btrfs-progs: check: modify check_fs_first_inode()
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
140 {
141         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143         struct data_backref *back1 = to_data_backref(ext1);
144         struct data_backref *back2 = to_data_backref(ext2);
145
146         WARN_ON(!ext1->is_data);
147         WARN_ON(!ext2->is_data);
148
149         /* parent and root are a union, so this covers both */
150         if (back1->parent > back2->parent)
151                 return 1;
152         if (back1->parent < back2->parent)
153                 return -1;
154
155         /* This is a full backref and the parents match. */
156         if (back1->node.full_backref)
157                 return 0;
158
159         if (back1->owner > back2->owner)
160                 return 1;
161         if (back1->owner < back2->owner)
162                 return -1;
163
164         if (back1->offset > back2->offset)
165                 return 1;
166         if (back1->offset < back2->offset)
167                 return -1;
168
169         if (back1->found_ref && back2->found_ref) {
170                 if (back1->disk_bytenr > back2->disk_bytenr)
171                         return 1;
172                 if (back1->disk_bytenr < back2->disk_bytenr)
173                         return -1;
174
175                 if (back1->bytes > back2->bytes)
176                         return 1;
177                 if (back1->bytes < back2->bytes)
178                         return -1;
179         }
180
181         return 0;
182 }
183
184 /*
185  * Much like data_backref, just removed the undetermined members
186  * and change it to use list_head.
187  * During extent scan, it is stored in root->orphan_data_extent.
188  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
189  */
190 struct orphan_data_extent {
191         struct list_head list;
192         u64 root;
193         u64 objectid;
194         u64 offset;
195         u64 disk_bytenr;
196         u64 disk_len;
197 };
198
199 struct tree_backref {
200         struct extent_backref node;
201         union {
202                 u64 parent;
203                 u64 root;
204         };
205 };
206
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
208 {
209         return container_of(back, struct tree_backref, node);
210 }
211
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
213 {
214         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216         struct tree_backref *back1 = to_tree_backref(ext1);
217         struct tree_backref *back2 = to_tree_backref(ext2);
218
219         WARN_ON(ext1->is_data);
220         WARN_ON(ext2->is_data);
221
222         /* parent and root are a union, so this covers both */
223         if (back1->parent > back2->parent)
224                 return 1;
225         if (back1->parent < back2->parent)
226                 return -1;
227
228         return 0;
229 }
230
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
232 {
233         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
235
236         if (ext1->is_data > ext2->is_data)
237                 return 1;
238
239         if (ext1->is_data < ext2->is_data)
240                 return -1;
241
242         if (ext1->full_backref > ext2->full_backref)
243                 return 1;
244         if (ext1->full_backref < ext2->full_backref)
245                 return -1;
246
247         if (ext1->is_data)
248                 return compare_data_backref(node1, node2);
249         else
250                 return compare_tree_backref(node1, node2);
251 }
252
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
255
256 struct extent_record {
257         struct list_head backrefs;
258         struct list_head dups;
259         struct rb_root backref_tree;
260         struct list_head list;
261         struct cache_extent cache;
262         struct btrfs_disk_key parent_key;
263         u64 start;
264         u64 max_size;
265         u64 nr;
266         u64 refs;
267         u64 extent_item_refs;
268         u64 generation;
269         u64 parent_generation;
270         u64 info_objectid;
271         u32 num_duplicates;
272         u8 info_level;
273         unsigned int flag_block_full_backref:2;
274         unsigned int found_rec:1;
275         unsigned int content_checked:1;
276         unsigned int owner_ref_checked:1;
277         unsigned int is_root:1;
278         unsigned int metadata:1;
279         unsigned int bad_full_backref:1;
280         unsigned int crossing_stripes:1;
281         unsigned int wrong_chunk_type:1;
282 };
283
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
285 {
286         return container_of(entry, struct extent_record, list);
287 }
288
289 struct inode_backref {
290         struct list_head list;
291         unsigned int found_dir_item:1;
292         unsigned int found_dir_index:1;
293         unsigned int found_inode_ref:1;
294         u8 filetype;
295         u8 ref_type;
296         int errors;
297         u64 dir;
298         u64 index;
299         u16 namelen;
300         char name[0];
301 };
302
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
304 {
305         return list_entry(entry, struct inode_backref, list);
306 }
307
308 struct root_item_record {
309         struct list_head list;
310         u64 objectid;
311         u64 bytenr;
312         u64 last_snapshot;
313         u8 level;
314         u8 drop_level;
315         struct btrfs_key drop_key;
316 };
317
318 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
320 #define REF_ERR_NO_INODE_REF            (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
323 #define REF_ERR_DUP_INODE_REF           (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF             (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
331
332 struct file_extent_hole {
333         struct rb_node node;
334         u64 start;
335         u64 len;
336 };
337
338 struct inode_record {
339         struct list_head backrefs;
340         unsigned int checked:1;
341         unsigned int merging:1;
342         unsigned int found_inode_item:1;
343         unsigned int found_dir_item:1;
344         unsigned int found_file_extent:1;
345         unsigned int found_csum_item:1;
346         unsigned int some_csum_missing:1;
347         unsigned int nodatasum:1;
348         int errors;
349
350         u64 ino;
351         u32 nlink;
352         u32 imode;
353         u64 isize;
354         u64 nbytes;
355
356         u32 found_link;
357         u64 found_size;
358         u64 extent_start;
359         u64 extent_end;
360         struct rb_root holes;
361         struct list_head orphan_extents;
362
363         u32 refs;
364 };
365
366 #define I_ERR_NO_INODE_ITEM             (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
381
382 struct root_backref {
383         struct list_head list;
384         unsigned int found_dir_item:1;
385         unsigned int found_dir_index:1;
386         unsigned int found_back_ref:1;
387         unsigned int found_forward_ref:1;
388         unsigned int reachable:1;
389         int errors;
390         u64 ref_root;
391         u64 dir;
392         u64 index;
393         u16 namelen;
394         char name[0];
395 };
396
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
398 {
399         return list_entry(entry, struct root_backref, list);
400 }
401
402 struct root_record {
403         struct list_head backrefs;
404         struct cache_extent cache;
405         unsigned int found_root_item:1;
406         u64 objectid;
407         u32 found_ref;
408 };
409
410 struct ptr_node {
411         struct cache_extent cache;
412         void *data;
413 };
414
415 struct shared_node {
416         struct cache_extent cache;
417         struct cache_tree root_cache;
418         struct cache_tree inode_cache;
419         struct inode_record *current;
420         u32 refs;
421 };
422
423 struct block_info {
424         u64 start;
425         u32 size;
426 };
427
428 struct walk_control {
429         struct cache_tree shared;
430         struct shared_node *nodes[BTRFS_MAX_LEVEL];
431         int active_node;
432         int root_level;
433 };
434
435 struct bad_item {
436         struct btrfs_key key;
437         u64 root_id;
438         struct list_head list;
439 };
440
441 struct extent_entry {
442         u64 bytenr;
443         u64 bytes;
444         int count;
445         int broken;
446         struct list_head list;
447 };
448
449 struct root_item_info {
450         /* level of the root */
451         u8 level;
452         /* number of nodes at this level, must be 1 for a root */
453         int node_count;
454         u64 bytenr;
455         u64 gen;
456         struct cache_extent cache_extent;
457 };
458
459 /*
460  * Error bit for low memory mode check.
461  *
462  * Currently no caller cares about it yet.  Just internal use for error
463  * classification.
464  */
465 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH     (1 << 8)
475
476 static void *print_status_check(void *p)
477 {
478         struct task_ctx *priv = p;
479         const char work_indicator[] = { '.', 'o', 'O', 'o' };
480         uint32_t count = 0;
481         static char *task_position_string[] = {
482                 "checking extents",
483                 "checking free space cache",
484                 "checking fs roots",
485         };
486
487         task_period_start(priv->info, 1000 /* 1s */);
488
489         if (priv->tp == TASK_NOTHING)
490                 return NULL;
491
492         while (1) {
493                 printf("%s [%c]\r", task_position_string[priv->tp],
494                                 work_indicator[count % 4]);
495                 count++;
496                 fflush(stdout);
497                 task_period_wait(priv->info);
498         }
499         return NULL;
500 }
501
502 static int print_status_return(void *p)
503 {
504         printf("\n");
505         fflush(stdout);
506
507         return 0;
508 }
509
510 static enum btrfs_check_mode parse_check_mode(const char *str)
511 {
512         if (strcmp(str, "lowmem") == 0)
513                 return CHECK_MODE_LOWMEM;
514         if (strcmp(str, "orig") == 0)
515                 return CHECK_MODE_ORIGINAL;
516         if (strcmp(str, "original") == 0)
517                 return CHECK_MODE_ORIGINAL;
518
519         return CHECK_MODE_UNKNOWN;
520 }
521
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
524 {
525         struct file_extent_hole *hole;
526
527         if (RB_EMPTY_ROOT(holes))
528                 return (u64)-1;
529
530         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
531         return hole->start;
532 }
533
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
535 {
536         struct file_extent_hole *hole1;
537         struct file_extent_hole *hole2;
538
539         hole1 = rb_entry(node1, struct file_extent_hole, node);
540         hole2 = rb_entry(node2, struct file_extent_hole, node);
541
542         if (hole1->start > hole2->start)
543                 return -1;
544         if (hole1->start < hole2->start)
545                 return 1;
546         /* Now hole1->start == hole2->start */
547         if (hole1->len >= hole2->len)
548                 /*
549                  * Hole 1 will be merge center
550                  * Same hole will be merged later
551                  */
552                 return -1;
553         /* Hole 2 will be merge center */
554         return 1;
555 }
556
557 /*
558  * Add a hole to the record
559  *
560  * This will do hole merge for copy_file_extent_holes(),
561  * which will ensure there won't be continuous holes.
562  */
563 static int add_file_extent_hole(struct rb_root *holes,
564                                 u64 start, u64 len)
565 {
566         struct file_extent_hole *hole;
567         struct file_extent_hole *prev = NULL;
568         struct file_extent_hole *next = NULL;
569
570         hole = malloc(sizeof(*hole));
571         if (!hole)
572                 return -ENOMEM;
573         hole->start = start;
574         hole->len = len;
575         /* Since compare will not return 0, no -EEXIST will happen */
576         rb_insert(holes, &hole->node, compare_hole);
577
578         /* simple merge with previous hole */
579         if (rb_prev(&hole->node))
580                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
581                                 node);
582         if (prev && prev->start + prev->len >= hole->start) {
583                 hole->len = hole->start + hole->len - prev->start;
584                 hole->start = prev->start;
585                 rb_erase(&prev->node, holes);
586                 free(prev);
587                 prev = NULL;
588         }
589
590         /* iterate merge with next holes */
591         while (1) {
592                 if (!rb_next(&hole->node))
593                         break;
594                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
595                                         node);
596                 if (hole->start + hole->len >= next->start) {
597                         if (hole->start + hole->len <= next->start + next->len)
598                                 hole->len = next->start + next->len -
599                                             hole->start;
600                         rb_erase(&next->node, holes);
601                         free(next);
602                         next = NULL;
603                 } else
604                         break;
605         }
606         return 0;
607 }
608
609 static int compare_hole_range(struct rb_node *node, void *data)
610 {
611         struct file_extent_hole *hole;
612         u64 start;
613
614         hole = (struct file_extent_hole *)data;
615         start = hole->start;
616
617         hole = rb_entry(node, struct file_extent_hole, node);
618         if (start < hole->start)
619                 return -1;
620         if (start >= hole->start && start < hole->start + hole->len)
621                 return 0;
622         return 1;
623 }
624
625 /*
626  * Delete a hole in the record
627  *
628  * This will do the hole split and is much restrict than add.
629  */
630 static int del_file_extent_hole(struct rb_root *holes,
631                                 u64 start, u64 len)
632 {
633         struct file_extent_hole *hole;
634         struct file_extent_hole tmp;
635         u64 prev_start = 0;
636         u64 prev_len = 0;
637         u64 next_start = 0;
638         u64 next_len = 0;
639         struct rb_node *node;
640         int have_prev = 0;
641         int have_next = 0;
642         int ret = 0;
643
644         tmp.start = start;
645         tmp.len = len;
646         node = rb_search(holes, &tmp, compare_hole_range, NULL);
647         if (!node)
648                 return -EEXIST;
649         hole = rb_entry(node, struct file_extent_hole, node);
650         if (start + len > hole->start + hole->len)
651                 return -EEXIST;
652
653         /*
654          * Now there will be no overlap, delete the hole and re-add the
655          * split(s) if they exists.
656          */
657         if (start > hole->start) {
658                 prev_start = hole->start;
659                 prev_len = start - hole->start;
660                 have_prev = 1;
661         }
662         if (hole->start + hole->len > start + len) {
663                 next_start = start + len;
664                 next_len = hole->start + hole->len - start - len;
665                 have_next = 1;
666         }
667         rb_erase(node, holes);
668         free(hole);
669         if (have_prev) {
670                 ret = add_file_extent_hole(holes, prev_start, prev_len);
671                 if (ret < 0)
672                         return ret;
673         }
674         if (have_next) {
675                 ret = add_file_extent_hole(holes, next_start, next_len);
676                 if (ret < 0)
677                         return ret;
678         }
679         return 0;
680 }
681
682 static int copy_file_extent_holes(struct rb_root *dst,
683                                   struct rb_root *src)
684 {
685         struct file_extent_hole *hole;
686         struct rb_node *node;
687         int ret = 0;
688
689         node = rb_first(src);
690         while (node) {
691                 hole = rb_entry(node, struct file_extent_hole, node);
692                 ret = add_file_extent_hole(dst, hole->start, hole->len);
693                 if (ret)
694                         break;
695                 node = rb_next(node);
696         }
697         return ret;
698 }
699
700 static void free_file_extent_holes(struct rb_root *holes)
701 {
702         struct rb_node *node;
703         struct file_extent_hole *hole;
704
705         node = rb_first(holes);
706         while (node) {
707                 hole = rb_entry(node, struct file_extent_hole, node);
708                 rb_erase(node, holes);
709                 free(hole);
710                 node = rb_first(holes);
711         }
712 }
713
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
715
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717                                  struct btrfs_root *root)
718 {
719         if (root->last_trans != trans->transid) {
720                 root->track_dirty = 1;
721                 root->last_trans = trans->transid;
722                 root->commit_root = root->node;
723                 extent_buffer_get(root->node);
724         }
725 }
726
727 static u8 imode_to_type(u32 imode)
728 {
729 #define S_SHIFT 12
730         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
732                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
733                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
734                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
735                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
736                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
737                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
738         };
739
740         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
741 #undef S_SHIFT
742 }
743
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
745 {
746         struct device_record *rec1;
747         struct device_record *rec2;
748
749         rec1 = rb_entry(node1, struct device_record, node);
750         rec2 = rb_entry(node2, struct device_record, node);
751         if (rec1->devid > rec2->devid)
752                 return -1;
753         else if (rec1->devid < rec2->devid)
754                 return 1;
755         else
756                 return 0;
757 }
758
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
760 {
761         struct inode_record *rec;
762         struct inode_backref *backref;
763         struct inode_backref *orig;
764         struct inode_backref *tmp;
765         struct orphan_data_extent *src_orphan;
766         struct orphan_data_extent *dst_orphan;
767         struct rb_node *rb;
768         size_t size;
769         int ret;
770
771         rec = malloc(sizeof(*rec));
772         if (!rec)
773                 return ERR_PTR(-ENOMEM);
774         memcpy(rec, orig_rec, sizeof(*rec));
775         rec->refs = 1;
776         INIT_LIST_HEAD(&rec->backrefs);
777         INIT_LIST_HEAD(&rec->orphan_extents);
778         rec->holes = RB_ROOT;
779
780         list_for_each_entry(orig, &orig_rec->backrefs, list) {
781                 size = sizeof(*orig) + orig->namelen + 1;
782                 backref = malloc(size);
783                 if (!backref) {
784                         ret = -ENOMEM;
785                         goto cleanup;
786                 }
787                 memcpy(backref, orig, size);
788                 list_add_tail(&backref->list, &rec->backrefs);
789         }
790         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791                 dst_orphan = malloc(sizeof(*dst_orphan));
792                 if (!dst_orphan) {
793                         ret = -ENOMEM;
794                         goto cleanup;
795                 }
796                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
798         }
799         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
800         if (ret < 0)
801                 goto cleanup_rb;
802
803         return rec;
804
805 cleanup_rb:
806         rb = rb_first(&rec->holes);
807         while (rb) {
808                 struct file_extent_hole *hole;
809
810                 hole = rb_entry(rb, struct file_extent_hole, node);
811                 rb = rb_next(rb);
812                 free(hole);
813         }
814
815 cleanup:
816         if (!list_empty(&rec->backrefs))
817                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818                         list_del(&orig->list);
819                         free(orig);
820                 }
821
822         if (!list_empty(&rec->orphan_extents))
823                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824                         list_del(&orig->list);
825                         free(orig);
826                 }
827
828         free(rec);
829
830         return ERR_PTR(ret);
831 }
832
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
834                                       u64 objectid)
835 {
836         struct orphan_data_extent *orphan;
837
838         if (list_empty(orphan_extents))
839                 return;
840         printf("The following data extent is lost in tree %llu:\n",
841                objectid);
842         list_for_each_entry(orphan, orphan_extents, list) {
843                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
845                        orphan->disk_len);
846         }
847 }
848
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
850 {
851         u64 root_objectid = root->root_key.objectid;
852         int errors = rec->errors;
853
854         if (!errors)
855                 return;
856         /* reloc root errors, we print its corresponding fs root objectid*/
857         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858                 root_objectid = root->root_key.offset;
859                 fprintf(stderr, "reloc");
860         }
861         fprintf(stderr, "root %llu inode %llu errors %x",
862                 (unsigned long long) root_objectid,
863                 (unsigned long long) rec->ino, rec->errors);
864
865         if (errors & I_ERR_NO_INODE_ITEM)
866                 fprintf(stderr, ", no inode item");
867         if (errors & I_ERR_NO_ORPHAN_ITEM)
868                 fprintf(stderr, ", no orphan item");
869         if (errors & I_ERR_DUP_INODE_ITEM)
870                 fprintf(stderr, ", dup inode item");
871         if (errors & I_ERR_DUP_DIR_INDEX)
872                 fprintf(stderr, ", dup dir index");
873         if (errors & I_ERR_ODD_DIR_ITEM)
874                 fprintf(stderr, ", odd dir item");
875         if (errors & I_ERR_ODD_FILE_EXTENT)
876                 fprintf(stderr, ", odd file extent");
877         if (errors & I_ERR_BAD_FILE_EXTENT)
878                 fprintf(stderr, ", bad file extent");
879         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880                 fprintf(stderr, ", file extent overlap");
881         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882                 fprintf(stderr, ", file extent discount");
883         if (errors & I_ERR_DIR_ISIZE_WRONG)
884                 fprintf(stderr, ", dir isize wrong");
885         if (errors & I_ERR_FILE_NBYTES_WRONG)
886                 fprintf(stderr, ", nbytes wrong");
887         if (errors & I_ERR_ODD_CSUM_ITEM)
888                 fprintf(stderr, ", odd csum item");
889         if (errors & I_ERR_SOME_CSUM_MISSING)
890                 fprintf(stderr, ", some csum missing");
891         if (errors & I_ERR_LINK_COUNT_WRONG)
892                 fprintf(stderr, ", link count wrong");
893         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894                 fprintf(stderr, ", orphan file extent");
895         fprintf(stderr, "\n");
896         /* Print the orphan extents if needed */
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
899
900         /* Print the holes if needed */
901         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902                 struct file_extent_hole *hole;
903                 struct rb_node *node;
904                 int found = 0;
905
906                 node = rb_first(&rec->holes);
907                 fprintf(stderr, "Found file extent holes:\n");
908                 while (node) {
909                         found = 1;
910                         hole = rb_entry(node, struct file_extent_hole, node);
911                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
912                                 hole->start, hole->len);
913                         node = rb_next(node);
914                 }
915                 if (!found)
916                         fprintf(stderr, "\tstart: 0, len: %llu\n",
917                                 round_up(rec->isize,
918                                          root->fs_info->sectorsize));
919         }
920 }
921
922 static void print_ref_error(int errors)
923 {
924         if (errors & REF_ERR_NO_DIR_ITEM)
925                 fprintf(stderr, ", no dir item");
926         if (errors & REF_ERR_NO_DIR_INDEX)
927                 fprintf(stderr, ", no dir index");
928         if (errors & REF_ERR_NO_INODE_REF)
929                 fprintf(stderr, ", no inode ref");
930         if (errors & REF_ERR_DUP_DIR_ITEM)
931                 fprintf(stderr, ", dup dir item");
932         if (errors & REF_ERR_DUP_DIR_INDEX)
933                 fprintf(stderr, ", dup dir index");
934         if (errors & REF_ERR_DUP_INODE_REF)
935                 fprintf(stderr, ", dup inode ref");
936         if (errors & REF_ERR_INDEX_UNMATCH)
937                 fprintf(stderr, ", index mismatch");
938         if (errors & REF_ERR_FILETYPE_UNMATCH)
939                 fprintf(stderr, ", filetype mismatch");
940         if (errors & REF_ERR_NAME_TOO_LONG)
941                 fprintf(stderr, ", name too long");
942         if (errors & REF_ERR_NO_ROOT_REF)
943                 fprintf(stderr, ", no root ref");
944         if (errors & REF_ERR_NO_ROOT_BACKREF)
945                 fprintf(stderr, ", no root backref");
946         if (errors & REF_ERR_DUP_ROOT_REF)
947                 fprintf(stderr, ", dup root ref");
948         if (errors & REF_ERR_DUP_ROOT_BACKREF)
949                 fprintf(stderr, ", dup root backref");
950         fprintf(stderr, "\n");
951 }
952
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
954                                           u64 ino, int mod)
955 {
956         struct ptr_node *node;
957         struct cache_extent *cache;
958         struct inode_record *rec = NULL;
959         int ret;
960
961         cache = lookup_cache_extent(inode_cache, ino, 1);
962         if (cache) {
963                 node = container_of(cache, struct ptr_node, cache);
964                 rec = node->data;
965                 if (mod && rec->refs > 1) {
966                         node->data = clone_inode_rec(rec);
967                         if (IS_ERR(node->data))
968                                 return node->data;
969                         rec->refs--;
970                         rec = node->data;
971                 }
972         } else if (mod) {
973                 rec = calloc(1, sizeof(*rec));
974                 if (!rec)
975                         return ERR_PTR(-ENOMEM);
976                 rec->ino = ino;
977                 rec->extent_start = (u64)-1;
978                 rec->refs = 1;
979                 INIT_LIST_HEAD(&rec->backrefs);
980                 INIT_LIST_HEAD(&rec->orphan_extents);
981                 rec->holes = RB_ROOT;
982
983                 node = malloc(sizeof(*node));
984                 if (!node) {
985                         free(rec);
986                         return ERR_PTR(-ENOMEM);
987                 }
988                 node->cache.start = ino;
989                 node->cache.size = 1;
990                 node->data = rec;
991
992                 if (ino == BTRFS_FREE_INO_OBJECTID)
993                         rec->found_link = 1;
994
995                 ret = insert_cache_extent(inode_cache, &node->cache);
996                 if (ret)
997                         return ERR_PTR(-EEXIST);
998         }
999         return rec;
1000 }
1001
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1003 {
1004         struct orphan_data_extent *orphan;
1005
1006         while (!list_empty(orphan_extents)) {
1007                 orphan = list_entry(orphan_extents->next,
1008                                     struct orphan_data_extent, list);
1009                 list_del(&orphan->list);
1010                 free(orphan);
1011         }
1012 }
1013
1014 static void free_inode_rec(struct inode_record *rec)
1015 {
1016         struct inode_backref *backref;
1017
1018         if (--rec->refs > 0)
1019                 return;
1020
1021         while (!list_empty(&rec->backrefs)) {
1022                 backref = to_inode_backref(rec->backrefs.next);
1023                 list_del(&backref->list);
1024                 free(backref);
1025         }
1026         free_orphan_data_extents(&rec->orphan_extents);
1027         free_file_extent_holes(&rec->holes);
1028         free(rec);
1029 }
1030
1031 static int can_free_inode_rec(struct inode_record *rec)
1032 {
1033         if (!rec->errors && rec->checked && rec->found_inode_item &&
1034             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1035                 return 1;
1036         return 0;
1037 }
1038
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040                                  struct inode_record *rec)
1041 {
1042         struct cache_extent *cache;
1043         struct inode_backref *tmp, *backref;
1044         struct ptr_node *node;
1045         u8 filetype;
1046
1047         if (!rec->found_inode_item)
1048                 return;
1049
1050         filetype = imode_to_type(rec->imode);
1051         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052                 if (backref->found_dir_item && backref->found_dir_index) {
1053                         if (backref->filetype != filetype)
1054                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055                         if (!backref->errors && backref->found_inode_ref &&
1056                             rec->nlink == rec->found_link) {
1057                                 list_del(&backref->list);
1058                                 free(backref);
1059                         }
1060                 }
1061         }
1062
1063         if (!rec->checked || rec->merging)
1064                 return;
1065
1066         if (S_ISDIR(rec->imode)) {
1067                 if (rec->found_size != rec->isize)
1068                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069                 if (rec->found_file_extent)
1070                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072                 if (rec->found_dir_item)
1073                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1074                 if (rec->found_size != rec->nbytes)
1075                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076                 if (rec->nlink > 0 && !no_holes &&
1077                     (rec->extent_end < rec->isize ||
1078                      first_extent_gap(&rec->holes) < rec->isize))
1079                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1080         }
1081
1082         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083                 if (rec->found_csum_item && rec->nodatasum)
1084                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085                 if (rec->some_csum_missing && !rec->nodatasum)
1086                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1087         }
1088
1089         BUG_ON(rec->refs != 1);
1090         if (can_free_inode_rec(rec)) {
1091                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092                 node = container_of(cache, struct ptr_node, cache);
1093                 BUG_ON(node->data != rec);
1094                 remove_cache_extent(inode_cache, &node->cache);
1095                 free(node);
1096                 free_inode_rec(rec);
1097         }
1098 }
1099
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1101 {
1102         struct btrfs_path path;
1103         struct btrfs_key key;
1104         int ret;
1105
1106         key.objectid = BTRFS_ORPHAN_OBJECTID;
1107         key.type = BTRFS_ORPHAN_ITEM_KEY;
1108         key.offset = ino;
1109
1110         btrfs_init_path(&path);
1111         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112         btrfs_release_path(&path);
1113         if (ret > 0)
1114                 ret = -ENOENT;
1115         return ret;
1116 }
1117
1118 static int process_inode_item(struct extent_buffer *eb,
1119                               int slot, struct btrfs_key *key,
1120                               struct shared_node *active_node)
1121 {
1122         struct inode_record *rec;
1123         struct btrfs_inode_item *item;
1124
1125         rec = active_node->current;
1126         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127         if (rec->found_inode_item) {
1128                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1129                 return 1;
1130         }
1131         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132         rec->nlink = btrfs_inode_nlink(eb, item);
1133         rec->isize = btrfs_inode_size(eb, item);
1134         rec->nbytes = btrfs_inode_nbytes(eb, item);
1135         rec->imode = btrfs_inode_mode(eb, item);
1136         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1137                 rec->nodatasum = 1;
1138         rec->found_inode_item = 1;
1139         if (rec->nlink == 0)
1140                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141         maybe_free_inode_rec(&active_node->inode_cache, rec);
1142         return 0;
1143 }
1144
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1146                                                 const char *name,
1147                                                 int namelen, u64 dir)
1148 {
1149         struct inode_backref *backref;
1150
1151         list_for_each_entry(backref, &rec->backrefs, list) {
1152                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1153                         break;
1154                 if (backref->dir != dir || backref->namelen != namelen)
1155                         continue;
1156                 if (memcmp(name, backref->name, namelen))
1157                         continue;
1158                 return backref;
1159         }
1160
1161         backref = malloc(sizeof(*backref) + namelen + 1);
1162         if (!backref)
1163                 return NULL;
1164         memset(backref, 0, sizeof(*backref));
1165         backref->dir = dir;
1166         backref->namelen = namelen;
1167         memcpy(backref->name, name, namelen);
1168         backref->name[namelen] = '\0';
1169         list_add_tail(&backref->list, &rec->backrefs);
1170         return backref;
1171 }
1172
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174                              u64 ino, u64 dir, u64 index,
1175                              const char *name, int namelen,
1176                              u8 filetype, u8 itemtype, int errors)
1177 {
1178         struct inode_record *rec;
1179         struct inode_backref *backref;
1180
1181         rec = get_inode_rec(inode_cache, ino, 1);
1182         BUG_ON(IS_ERR(rec));
1183         backref = get_inode_backref(rec, name, namelen, dir);
1184         BUG_ON(!backref);
1185         if (errors)
1186                 backref->errors |= errors;
1187         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188                 if (backref->found_dir_index)
1189                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190                 if (backref->found_inode_ref && backref->index != index)
1191                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1192                 if (backref->found_dir_item && backref->filetype != filetype)
1193                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1194
1195                 backref->index = index;
1196                 backref->filetype = filetype;
1197                 backref->found_dir_index = 1;
1198         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1199                 rec->found_link++;
1200                 if (backref->found_dir_item)
1201                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202                 if (backref->found_dir_index && backref->filetype != filetype)
1203                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1204
1205                 backref->filetype = filetype;
1206                 backref->found_dir_item = 1;
1207         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209                 if (backref->found_inode_ref)
1210                         backref->errors |= REF_ERR_DUP_INODE_REF;
1211                 if (backref->found_dir_index && backref->index != index)
1212                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1213                 else
1214                         backref->index = index;
1215
1216                 backref->ref_type = itemtype;
1217                 backref->found_inode_ref = 1;
1218         } else {
1219                 BUG_ON(1);
1220         }
1221
1222         maybe_free_inode_rec(inode_cache, rec);
1223         return 0;
1224 }
1225
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227                             struct cache_tree *dst_cache)
1228 {
1229         struct inode_backref *backref;
1230         u32 dir_count = 0;
1231         int ret = 0;
1232
1233         dst->merging = 1;
1234         list_for_each_entry(backref, &src->backrefs, list) {
1235                 if (backref->found_dir_index) {
1236                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1237                                         backref->index, backref->name,
1238                                         backref->namelen, backref->filetype,
1239                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1240                 }
1241                 if (backref->found_dir_item) {
1242                         dir_count++;
1243                         add_inode_backref(dst_cache, dst->ino,
1244                                         backref->dir, 0, backref->name,
1245                                         backref->namelen, backref->filetype,
1246                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1247                 }
1248                 if (backref->found_inode_ref) {
1249                         add_inode_backref(dst_cache, dst->ino,
1250                                         backref->dir, backref->index,
1251                                         backref->name, backref->namelen, 0,
1252                                         backref->ref_type, backref->errors);
1253                 }
1254         }
1255
1256         if (src->found_dir_item)
1257                 dst->found_dir_item = 1;
1258         if (src->found_file_extent)
1259                 dst->found_file_extent = 1;
1260         if (src->found_csum_item)
1261                 dst->found_csum_item = 1;
1262         if (src->some_csum_missing)
1263                 dst->some_csum_missing = 1;
1264         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1266                 if (ret < 0)
1267                         return ret;
1268         }
1269
1270         BUG_ON(src->found_link < dir_count);
1271         dst->found_link += src->found_link - dir_count;
1272         dst->found_size += src->found_size;
1273         if (src->extent_start != (u64)-1) {
1274                 if (dst->extent_start == (u64)-1) {
1275                         dst->extent_start = src->extent_start;
1276                         dst->extent_end = src->extent_end;
1277                 } else {
1278                         if (dst->extent_end > src->extent_start)
1279                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280                         else if (dst->extent_end < src->extent_start) {
1281                                 ret = add_file_extent_hole(&dst->holes,
1282                                         dst->extent_end,
1283                                         src->extent_start - dst->extent_end);
1284                         }
1285                         if (dst->extent_end < src->extent_end)
1286                                 dst->extent_end = src->extent_end;
1287                 }
1288         }
1289
1290         dst->errors |= src->errors;
1291         if (src->found_inode_item) {
1292                 if (!dst->found_inode_item) {
1293                         dst->nlink = src->nlink;
1294                         dst->isize = src->isize;
1295                         dst->nbytes = src->nbytes;
1296                         dst->imode = src->imode;
1297                         dst->nodatasum = src->nodatasum;
1298                         dst->found_inode_item = 1;
1299                 } else {
1300                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1301                 }
1302         }
1303         dst->merging = 0;
1304
1305         return 0;
1306 }
1307
1308 static int splice_shared_node(struct shared_node *src_node,
1309                               struct shared_node *dst_node)
1310 {
1311         struct cache_extent *cache;
1312         struct ptr_node *node, *ins;
1313         struct cache_tree *src, *dst;
1314         struct inode_record *rec, *conflict;
1315         u64 current_ino = 0;
1316         int splice = 0;
1317         int ret;
1318
1319         if (--src_node->refs == 0)
1320                 splice = 1;
1321         if (src_node->current)
1322                 current_ino = src_node->current->ino;
1323
1324         src = &src_node->root_cache;
1325         dst = &dst_node->root_cache;
1326 again:
1327         cache = search_cache_extent(src, 0);
1328         while (cache) {
1329                 node = container_of(cache, struct ptr_node, cache);
1330                 rec = node->data;
1331                 cache = next_cache_extent(cache);
1332
1333                 if (splice) {
1334                         remove_cache_extent(src, &node->cache);
1335                         ins = node;
1336                 } else {
1337                         ins = malloc(sizeof(*ins));
1338                         BUG_ON(!ins);
1339                         ins->cache.start = node->cache.start;
1340                         ins->cache.size = node->cache.size;
1341                         ins->data = rec;
1342                         rec->refs++;
1343                 }
1344                 ret = insert_cache_extent(dst, &ins->cache);
1345                 if (ret == -EEXIST) {
1346                         conflict = get_inode_rec(dst, rec->ino, 1);
1347                         BUG_ON(IS_ERR(conflict));
1348                         merge_inode_recs(rec, conflict, dst);
1349                         if (rec->checked) {
1350                                 conflict->checked = 1;
1351                                 if (dst_node->current == conflict)
1352                                         dst_node->current = NULL;
1353                         }
1354                         maybe_free_inode_rec(dst, conflict);
1355                         free_inode_rec(rec);
1356                         free(ins);
1357                 } else {
1358                         BUG_ON(ret);
1359                 }
1360         }
1361
1362         if (src == &src_node->root_cache) {
1363                 src = &src_node->inode_cache;
1364                 dst = &dst_node->inode_cache;
1365                 goto again;
1366         }
1367
1368         if (current_ino > 0 && (!dst_node->current ||
1369             current_ino > dst_node->current->ino)) {
1370                 if (dst_node->current) {
1371                         dst_node->current->checked = 1;
1372                         maybe_free_inode_rec(dst, dst_node->current);
1373                 }
1374                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375                 BUG_ON(IS_ERR(dst_node->current));
1376         }
1377         return 0;
1378 }
1379
1380 static void free_inode_ptr(struct cache_extent *cache)
1381 {
1382         struct ptr_node *node;
1383         struct inode_record *rec;
1384
1385         node = container_of(cache, struct ptr_node, cache);
1386         rec = node->data;
1387         free_inode_rec(rec);
1388         free(node);
1389 }
1390
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1392
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1394                                             u64 bytenr)
1395 {
1396         struct cache_extent *cache;
1397         struct shared_node *node;
1398
1399         cache = lookup_cache_extent(shared, bytenr, 1);
1400         if (cache) {
1401                 node = container_of(cache, struct shared_node, cache);
1402                 return node;
1403         }
1404         return NULL;
1405 }
1406
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1408 {
1409         int ret;
1410         struct shared_node *node;
1411
1412         node = calloc(1, sizeof(*node));
1413         if (!node)
1414                 return -ENOMEM;
1415         node->cache.start = bytenr;
1416         node->cache.size = 1;
1417         cache_tree_init(&node->root_cache);
1418         cache_tree_init(&node->inode_cache);
1419         node->refs = refs;
1420
1421         ret = insert_cache_extent(shared, &node->cache);
1422
1423         return ret;
1424 }
1425
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427                              struct walk_control *wc, int level)
1428 {
1429         struct shared_node *node;
1430         struct shared_node *dest;
1431         int ret;
1432
1433         if (level == wc->active_node)
1434                 return 0;
1435
1436         BUG_ON(wc->active_node <= level);
1437         node = find_shared_node(&wc->shared, bytenr);
1438         if (!node) {
1439                 ret = add_shared_node(&wc->shared, bytenr, refs);
1440                 BUG_ON(ret);
1441                 node = find_shared_node(&wc->shared, bytenr);
1442                 wc->nodes[level] = node;
1443                 wc->active_node = level;
1444                 return 0;
1445         }
1446
1447         if (wc->root_level == wc->active_node &&
1448             btrfs_root_refs(&root->root_item) == 0) {
1449                 if (--node->refs == 0) {
1450                         free_inode_recs_tree(&node->root_cache);
1451                         free_inode_recs_tree(&node->inode_cache);
1452                         remove_cache_extent(&wc->shared, &node->cache);
1453                         free(node);
1454                 }
1455                 return 1;
1456         }
1457
1458         dest = wc->nodes[wc->active_node];
1459         splice_shared_node(node, dest);
1460         if (node->refs == 0) {
1461                 remove_cache_extent(&wc->shared, &node->cache);
1462                 free(node);
1463         }
1464         return 1;
1465 }
1466
1467 static int leave_shared_node(struct btrfs_root *root,
1468                              struct walk_control *wc, int level)
1469 {
1470         struct shared_node *node;
1471         struct shared_node *dest;
1472         int i;
1473
1474         if (level == wc->root_level)
1475                 return 0;
1476
1477         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1478                 if (wc->nodes[i])
1479                         break;
1480         }
1481         BUG_ON(i >= BTRFS_MAX_LEVEL);
1482
1483         node = wc->nodes[wc->active_node];
1484         wc->nodes[wc->active_node] = NULL;
1485         wc->active_node = i;
1486
1487         dest = wc->nodes[wc->active_node];
1488         if (wc->active_node < wc->root_level ||
1489             btrfs_root_refs(&root->root_item) > 0) {
1490                 BUG_ON(node->refs <= 1);
1491                 splice_shared_node(node, dest);
1492         } else {
1493                 BUG_ON(node->refs < 2);
1494                 node->refs--;
1495         }
1496         return 0;
1497 }
1498
1499 /*
1500  * Returns:
1501  * < 0 - on error
1502  * 1   - if the root with id child_root_id is a child of root parent_root_id
1503  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1504  *       has other root(s) as parent(s)
1505  * 2   - if the root child_root_id doesn't have any parent roots
1506  */
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1508                          u64 child_root_id)
1509 {
1510         struct btrfs_path path;
1511         struct btrfs_key key;
1512         struct extent_buffer *leaf;
1513         int has_parent = 0;
1514         int ret;
1515
1516         btrfs_init_path(&path);
1517
1518         key.objectid = parent_root_id;
1519         key.type = BTRFS_ROOT_REF_KEY;
1520         key.offset = child_root_id;
1521         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1522                                 0, 0);
1523         if (ret < 0)
1524                 return ret;
1525         btrfs_release_path(&path);
1526         if (!ret)
1527                 return 1;
1528
1529         key.objectid = child_root_id;
1530         key.type = BTRFS_ROOT_BACKREF_KEY;
1531         key.offset = 0;
1532         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1533                                 0, 0);
1534         if (ret < 0)
1535                 goto out;
1536
1537         while (1) {
1538                 leaf = path.nodes[0];
1539                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1541                         if (ret)
1542                                 break;
1543                         leaf = path.nodes[0];
1544                 }
1545
1546                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547                 if (key.objectid != child_root_id ||
1548                     key.type != BTRFS_ROOT_BACKREF_KEY)
1549                         break;
1550
1551                 has_parent = 1;
1552
1553                 if (key.offset == parent_root_id) {
1554                         btrfs_release_path(&path);
1555                         return 1;
1556                 }
1557
1558                 path.slots[0]++;
1559         }
1560 out:
1561         btrfs_release_path(&path);
1562         if (ret < 0)
1563                 return ret;
1564         return has_parent ? 0 : 2;
1565 }
1566
1567 static int process_dir_item(struct extent_buffer *eb,
1568                             int slot, struct btrfs_key *key,
1569                             struct shared_node *active_node)
1570 {
1571         u32 total;
1572         u32 cur = 0;
1573         u32 len;
1574         u32 name_len;
1575         u32 data_len;
1576         int error;
1577         int nritems = 0;
1578         u8 filetype;
1579         struct btrfs_dir_item *di;
1580         struct inode_record *rec;
1581         struct cache_tree *root_cache;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_key location;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         root_cache = &active_node->root_cache;
1587         inode_cache = &active_node->inode_cache;
1588         rec = active_node->current;
1589         rec->found_dir_item = 1;
1590
1591         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592         total = btrfs_item_size_nr(eb, slot);
1593         while (cur < total) {
1594                 nritems++;
1595                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596                 name_len = btrfs_dir_name_len(eb, di);
1597                 data_len = btrfs_dir_data_len(eb, di);
1598                 filetype = btrfs_dir_type(eb, di);
1599
1600                 rec->found_size += name_len;
1601                 if (cur + sizeof(*di) + name_len > total ||
1602                     name_len > BTRFS_NAME_LEN) {
1603                         error = REF_ERR_NAME_TOO_LONG;
1604
1605                         if (cur + sizeof(*di) > total)
1606                                 break;
1607                         len = min_t(u32, total - cur - sizeof(*di),
1608                                     BTRFS_NAME_LEN);
1609                 } else {
1610                         len = name_len;
1611                         error = 0;
1612                 }
1613
1614                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1615
1616                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617                     key->offset != btrfs_name_hash(namebuf, len)) {
1618                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1619                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620                         key->objectid, key->offset, namebuf, len, filetype,
1621                         key->offset, btrfs_name_hash(namebuf, len));
1622                 }
1623
1624                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625                         add_inode_backref(inode_cache, location.objectid,
1626                                           key->objectid, key->offset, namebuf,
1627                                           len, filetype, key->type, error);
1628                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629                         add_inode_backref(root_cache, location.objectid,
1630                                           key->objectid, key->offset,
1631                                           namebuf, len, filetype,
1632                                           key->type, error);
1633                 } else {
1634                         fprintf(stderr, "invalid location in dir item %u\n",
1635                                 location.type);
1636                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637                                           key->objectid, key->offset, namebuf,
1638                                           len, filetype, key->type, error);
1639                 }
1640
1641                 len = sizeof(*di) + name_len + data_len;
1642                 di = (struct btrfs_dir_item *)((char *)di + len);
1643                 cur += len;
1644         }
1645         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1647
1648         return 0;
1649 }
1650
1651 static int process_inode_ref(struct extent_buffer *eb,
1652                              int slot, struct btrfs_key *key,
1653                              struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         int error;
1661         struct cache_tree *inode_cache;
1662         struct btrfs_inode_ref *ref;
1663         char namebuf[BTRFS_NAME_LEN];
1664
1665         inode_cache = &active_node->inode_cache;
1666
1667         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668         total = btrfs_item_size_nr(eb, slot);
1669         while (cur < total) {
1670                 name_len = btrfs_inode_ref_name_len(eb, ref);
1671                 index = btrfs_inode_ref_index(eb, ref);
1672
1673                 /* inode_ref + namelen should not cross item boundary */
1674                 if (cur + sizeof(*ref) + name_len > total ||
1675                     name_len > BTRFS_NAME_LEN) {
1676                         if (total < cur + sizeof(*ref))
1677                                 break;
1678
1679                         /* Still try to read out the remaining part */
1680                         len = min_t(u32, total - cur - sizeof(*ref),
1681                                     BTRFS_NAME_LEN);
1682                         error = REF_ERR_NAME_TOO_LONG;
1683                 } else {
1684                         len = name_len;
1685                         error = 0;
1686                 }
1687
1688                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689                 add_inode_backref(inode_cache, key->objectid, key->offset,
1690                                   index, namebuf, len, 0, key->type, error);
1691
1692                 len = sizeof(*ref) + name_len;
1693                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1694                 cur += len;
1695         }
1696         return 0;
1697 }
1698
1699 static int process_inode_extref(struct extent_buffer *eb,
1700                                 int slot, struct btrfs_key *key,
1701                                 struct shared_node *active_node)
1702 {
1703         u32 total;
1704         u32 cur = 0;
1705         u32 len;
1706         u32 name_len;
1707         u64 index;
1708         u64 parent;
1709         int error;
1710         struct cache_tree *inode_cache;
1711         struct btrfs_inode_extref *extref;
1712         char namebuf[BTRFS_NAME_LEN];
1713
1714         inode_cache = &active_node->inode_cache;
1715
1716         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717         total = btrfs_item_size_nr(eb, slot);
1718         while (cur < total) {
1719                 name_len = btrfs_inode_extref_name_len(eb, extref);
1720                 index = btrfs_inode_extref_index(eb, extref);
1721                 parent = btrfs_inode_extref_parent(eb, extref);
1722                 if (name_len <= BTRFS_NAME_LEN) {
1723                         len = name_len;
1724                         error = 0;
1725                 } else {
1726                         len = BTRFS_NAME_LEN;
1727                         error = REF_ERR_NAME_TOO_LONG;
1728                 }
1729                 read_extent_buffer(eb, namebuf,
1730                                    (unsigned long)(extref + 1), len);
1731                 add_inode_backref(inode_cache, key->objectid, parent,
1732                                   index, namebuf, len, 0, key->type, error);
1733
1734                 len = sizeof(*extref) + name_len;
1735                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1736                 cur += len;
1737         }
1738         return 0;
1739
1740 }
1741
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743                             u64 len, u64 *found)
1744 {
1745         struct btrfs_key key;
1746         struct btrfs_path path;
1747         struct extent_buffer *leaf;
1748         int ret;
1749         size_t size;
1750         *found = 0;
1751         u64 csum_end;
1752         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1753
1754         btrfs_init_path(&path);
1755
1756         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1757         key.offset = start;
1758         key.type = BTRFS_EXTENT_CSUM_KEY;
1759
1760         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1761                                 &key, &path, 0, 0);
1762         if (ret < 0)
1763                 goto out;
1764         if (ret > 0 && path.slots[0] > 0) {
1765                 leaf = path.nodes[0];
1766                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768                     key.type == BTRFS_EXTENT_CSUM_KEY)
1769                         path.slots[0]--;
1770         }
1771
1772         while (len > 0) {
1773                 leaf = path.nodes[0];
1774                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1776                         if (ret > 0)
1777                                 break;
1778                         else if (ret < 0)
1779                                 goto out;
1780                         leaf = path.nodes[0];
1781                 }
1782
1783                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785                     key.type != BTRFS_EXTENT_CSUM_KEY)
1786                         break;
1787
1788                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789                 if (key.offset >= start + len)
1790                         break;
1791
1792                 if (key.offset > start)
1793                         start = key.offset;
1794
1795                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796                 csum_end = key.offset + (size / csum_size) *
1797                            root->fs_info->sectorsize;
1798                 if (csum_end > start) {
1799                         size = min(csum_end - start, len);
1800                         len -= size;
1801                         start += size;
1802                         *found += size;
1803                 }
1804
1805                 path.slots[0]++;
1806         }
1807 out:
1808         btrfs_release_path(&path);
1809         if (ret < 0)
1810                 return ret;
1811         return 0;
1812 }
1813
1814 static int process_file_extent(struct btrfs_root *root,
1815                                 struct extent_buffer *eb,
1816                                 int slot, struct btrfs_key *key,
1817                                 struct shared_node *active_node)
1818 {
1819         struct inode_record *rec;
1820         struct btrfs_file_extent_item *fi;
1821         u64 num_bytes = 0;
1822         u64 disk_bytenr = 0;
1823         u64 extent_offset = 0;
1824         u64 mask = root->fs_info->sectorsize - 1;
1825         int extent_type;
1826         int ret;
1827
1828         rec = active_node->current;
1829         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830         rec->found_file_extent = 1;
1831
1832         if (rec->extent_start == (u64)-1) {
1833                 rec->extent_start = key->offset;
1834                 rec->extent_end = key->offset;
1835         }
1836
1837         if (rec->extent_end > key->offset)
1838                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839         else if (rec->extent_end < key->offset) {
1840                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841                                            key->offset - rec->extent_end);
1842                 if (ret < 0)
1843                         return ret;
1844         }
1845
1846         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847         extent_type = btrfs_file_extent_type(eb, fi);
1848
1849         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1851                 if (num_bytes == 0)
1852                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853                 rec->found_size += num_bytes;
1854                 num_bytes = (num_bytes + mask) & ~mask;
1855         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859                 extent_offset = btrfs_file_extent_offset(eb, fi);
1860                 if (num_bytes == 0 || (num_bytes & mask))
1861                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862                 if (num_bytes + extent_offset >
1863                     btrfs_file_extent_ram_bytes(eb, fi))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866                     (btrfs_file_extent_compression(eb, fi) ||
1867                      btrfs_file_extent_encryption(eb, fi) ||
1868                      btrfs_file_extent_other_encoding(eb, fi)))
1869                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870                 if (disk_bytenr > 0)
1871                         rec->found_size += num_bytes;
1872         } else {
1873                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874         }
1875         rec->extent_end = key->offset + num_bytes;
1876
1877         /*
1878          * The data reloc tree will copy full extents into its inode and then
1879          * copy the corresponding csums.  Because the extent it copied could be
1880          * a preallocated extent that hasn't been written to yet there may be no
1881          * csums to copy, ergo we won't have csums for our file extent.  This is
1882          * ok so just don't bother checking csums if the inode belongs to the
1883          * data reloc tree.
1884          */
1885         if (disk_bytenr > 0 &&
1886             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1887                 u64 found;
1888                 if (btrfs_file_extent_compression(eb, fi))
1889                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1890                 else
1891                         disk_bytenr += extent_offset;
1892
1893                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1894                 if (ret < 0)
1895                         return ret;
1896                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1897                         if (found > 0)
1898                                 rec->found_csum_item = 1;
1899                         if (found < num_bytes)
1900                                 rec->some_csum_missing = 1;
1901                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1902                         if (found > 0)
1903                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1904                 }
1905         }
1906         return 0;
1907 }
1908
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910                             struct walk_control *wc)
1911 {
1912         struct btrfs_key key;
1913         u32 nritems;
1914         int i;
1915         int ret = 0;
1916         struct cache_tree *inode_cache;
1917         struct shared_node *active_node;
1918
1919         if (wc->root_level == wc->active_node &&
1920             btrfs_root_refs(&root->root_item) == 0)
1921                 return 0;
1922
1923         active_node = wc->nodes[wc->active_node];
1924         inode_cache = &active_node->inode_cache;
1925         nritems = btrfs_header_nritems(eb);
1926         for (i = 0; i < nritems; i++) {
1927                 btrfs_item_key_to_cpu(eb, &key, i);
1928
1929                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1930                         continue;
1931                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1932                         continue;
1933
1934                 if (active_node->current == NULL ||
1935                     active_node->current->ino < key.objectid) {
1936                         if (active_node->current) {
1937                                 active_node->current->checked = 1;
1938                                 maybe_free_inode_rec(inode_cache,
1939                                                      active_node->current);
1940                         }
1941                         active_node->current = get_inode_rec(inode_cache,
1942                                                              key.objectid, 1);
1943                         BUG_ON(IS_ERR(active_node->current));
1944                 }
1945                 switch (key.type) {
1946                 case BTRFS_DIR_ITEM_KEY:
1947                 case BTRFS_DIR_INDEX_KEY:
1948                         ret = process_dir_item(eb, i, &key, active_node);
1949                         break;
1950                 case BTRFS_INODE_REF_KEY:
1951                         ret = process_inode_ref(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_EXTREF_KEY:
1954                         ret = process_inode_extref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_ITEM_KEY:
1957                         ret = process_inode_item(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_EXTENT_DATA_KEY:
1960                         ret = process_file_extent(root, eb, i, &key,
1961                                                   active_node);
1962                         break;
1963                 default:
1964                         break;
1965                 };
1966         }
1967         return ret;
1968 }
1969
1970 struct node_refs {
1971         u64 bytenr[BTRFS_MAX_LEVEL];
1972         u64 refs[BTRFS_MAX_LEVEL];
1973         int need_check[BTRFS_MAX_LEVEL];
1974 };
1975
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977                              struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979                             unsigned int ext_ref);
1980
1981 /*
1982  * Returns >0  Found error, not fatal, should continue
1983  * Returns <0  Fatal error, must exit the whole check
1984  * Returns 0   No errors found
1985  */
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987                                struct node_refs *nrefs, int *level, int ext_ref)
1988 {
1989         struct extent_buffer *cur = path->nodes[0];
1990         struct btrfs_key key;
1991         u64 cur_bytenr;
1992         u32 nritems;
1993         u64 first_ino = 0;
1994         int root_level = btrfs_header_level(root->node);
1995         int i;
1996         int ret = 0; /* Final return value */
1997         int err = 0; /* Positive error bitmap */
1998
1999         cur_bytenr = cur->start;
2000
2001         /* skip to first inode item or the first inode number change */
2002         nritems = btrfs_header_nritems(cur);
2003         for (i = 0; i < nritems; i++) {
2004                 btrfs_item_key_to_cpu(cur, &key, i);
2005                 if (i == 0)
2006                         first_ino = key.objectid;
2007                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008                     (first_ino && first_ino != key.objectid))
2009                         break;
2010         }
2011         if (i == nritems) {
2012                 path->slots[0] = nritems;
2013                 return 0;
2014         }
2015         path->slots[0] = i;
2016
2017 again:
2018         err |= check_inode_item(root, path, ext_ref);
2019
2020         /* modify cur since check_inode_item may change path */
2021         cur = path->nodes[0];
2022
2023         if (err & LAST_ITEM)
2024                 goto out;
2025
2026         /* still have inode items in thie leaf */
2027         if (cur->start == cur_bytenr)
2028                 goto again;
2029
2030         /*
2031          * we have switched to another leaf, above nodes may
2032          * have changed, here walk down the path, if a node
2033          * or leaf is shared, check whether we can skip this
2034          * node or leaf.
2035          */
2036         for (i = root_level; i >= 0; i--) {
2037                 if (path->nodes[i]->start == nrefs->bytenr[i])
2038                         continue;
2039
2040                 ret = update_nodes_refs(root,
2041                                 path->nodes[i]->start,
2042                                 nrefs, i);
2043                 if (ret)
2044                         goto out;
2045
2046                 if (!nrefs->need_check[i]) {
2047                         *level += 1;
2048                         break;
2049                 }
2050         }
2051
2052         for (i = 0; i < *level; i++) {
2053                 free_extent_buffer(path->nodes[i]);
2054                 path->nodes[i] = NULL;
2055         }
2056 out:
2057         err &= ~LAST_ITEM;
2058         if (err && !ret)
2059                 ret = err;
2060         return ret;
2061 }
2062
2063 static void reada_walk_down(struct btrfs_root *root,
2064                             struct extent_buffer *node, int slot)
2065 {
2066         struct btrfs_fs_info *fs_info = root->fs_info;
2067         u64 bytenr;
2068         u64 ptr_gen;
2069         u32 nritems;
2070         int i;
2071         int level;
2072
2073         level = btrfs_header_level(node);
2074         if (level != 1)
2075                 return;
2076
2077         nritems = btrfs_header_nritems(node);
2078         for (i = slot; i < nritems; i++) {
2079                 bytenr = btrfs_node_blockptr(node, i);
2080                 ptr_gen = btrfs_node_ptr_generation(node, i);
2081                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2082         }
2083 }
2084
2085 /*
2086  * Check the child node/leaf by the following condition:
2087  * 1. the first item key of the node/leaf should be the same with the one
2088  *    in parent.
2089  * 2. block in parent node should match the child node/leaf.
2090  * 3. generation of parent node and child's header should be consistent.
2091  *
2092  * Or the child node/leaf pointed by the key in parent is not valid.
2093  *
2094  * We hope to check leaf owner too, but since subvol may share leaves,
2095  * which makes leaf owner check not so strong, key check should be
2096  * sufficient enough for that case.
2097  */
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099                             struct extent_buffer *child)
2100 {
2101         struct btrfs_key parent_key;
2102         struct btrfs_key child_key;
2103         int ret = 0;
2104
2105         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106         if (btrfs_header_level(child) == 0)
2107                 btrfs_item_key_to_cpu(child, &child_key, 0);
2108         else
2109                 btrfs_node_key_to_cpu(child, &child_key, 0);
2110
2111         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2112                 ret = -EINVAL;
2113                 fprintf(stderr,
2114                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115                         parent_key.objectid, parent_key.type, parent_key.offset,
2116                         child_key.objectid, child_key.type, child_key.offset);
2117         }
2118         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2119                 ret = -EINVAL;
2120                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121                         btrfs_node_blockptr(parent, slot),
2122                         btrfs_header_bytenr(child));
2123         }
2124         if (btrfs_node_ptr_generation(parent, slot) !=
2125             btrfs_header_generation(child)) {
2126                 ret = -EINVAL;
2127                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128                         btrfs_header_generation(child),
2129                         btrfs_node_ptr_generation(parent, slot));
2130         }
2131         return ret;
2132 }
2133
2134 /*
2135  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136  * in every fs or file tree check. Here we find its all root ids, and only check
2137  * it in the fs or file tree which has the smallest root id.
2138  */
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2140 {
2141         struct rb_node *node;
2142         struct ulist_node *u;
2143
2144         if (roots->nnodes == 1)
2145                 return 1;
2146
2147         node = rb_first(&roots->root);
2148         u = rb_entry(node, struct ulist_node, rb_node);
2149         /*
2150          * current root id is not smallest, we skip it and let it be checked
2151          * in the fs or file tree who hash the smallest root id.
2152          */
2153         if (root->objectid != u->val)
2154                 return 0;
2155
2156         return 1;
2157 }
2158
2159 /*
2160  * for a tree node or leaf, we record its reference count, so later if we still
2161  * process this node or leaf, don't need to compute its reference count again.
2162  */
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164                              struct node_refs *nrefs, u64 level)
2165 {
2166         int check, ret;
2167         u64 refs;
2168         struct ulist *roots;
2169
2170         if (nrefs->bytenr[level] != bytenr) {
2171                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172                                        level, 1, &refs, NULL);
2173                 if (ret < 0)
2174                         return ret;
2175
2176                 nrefs->bytenr[level] = bytenr;
2177                 nrefs->refs[level] = refs;
2178                 if (refs > 1) {
2179                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2180                                                    0, &roots);
2181                         if (ret)
2182                                 return -EIO;
2183
2184                         check = need_check(root, roots);
2185                         ulist_free(roots);
2186                         nrefs->need_check[level] = check;
2187                 } else {
2188                         nrefs->need_check[level] = 1;
2189                 }
2190         }
2191
2192         return 0;
2193 }
2194
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196                           struct walk_control *wc, int *level,
2197                           struct node_refs *nrefs)
2198 {
2199         enum btrfs_tree_block_status status;
2200         u64 bytenr;
2201         u64 ptr_gen;
2202         struct btrfs_fs_info *fs_info = root->fs_info;
2203         struct extent_buffer *next;
2204         struct extent_buffer *cur;
2205         int ret, err = 0;
2206         u64 refs;
2207
2208         WARN_ON(*level < 0);
2209         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2210
2211         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212                 refs = nrefs->refs[*level];
2213                 ret = 0;
2214         } else {
2215                 ret = btrfs_lookup_extent_info(NULL, root,
2216                                        path->nodes[*level]->start,
2217                                        *level, 1, &refs, NULL);
2218                 if (ret < 0) {
2219                         err = ret;
2220                         goto out;
2221                 }
2222                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223                 nrefs->refs[*level] = refs;
2224         }
2225
2226         if (refs > 1) {
2227                 ret = enter_shared_node(root, path->nodes[*level]->start,
2228                                         refs, wc, *level);
2229                 if (ret > 0) {
2230                         err = ret;
2231                         goto out;
2232                 }
2233         }
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 if (*level == 0) {
2246                         ret = process_one_leaf(root, cur, wc);
2247                         if (ret < 0)
2248                                 err = ret;
2249                         break;
2250                 }
2251                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2253
2254                 if (bytenr == nrefs->bytenr[*level - 1]) {
2255                         refs = nrefs->refs[*level - 1];
2256                 } else {
2257                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258                                         *level - 1, 1, &refs, NULL);
2259                         if (ret < 0) {
2260                                 refs = 0;
2261                         } else {
2262                                 nrefs->bytenr[*level - 1] = bytenr;
2263                                 nrefs->refs[*level - 1] = refs;
2264                         }
2265                 }
2266
2267                 if (refs > 1) {
2268                         ret = enter_shared_node(root, bytenr, refs,
2269                                                 wc, *level - 1);
2270                         if (ret > 0) {
2271                                 path->slots[*level]++;
2272                                 continue;
2273                         }
2274                 }
2275
2276                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278                         free_extent_buffer(next);
2279                         reada_walk_down(root, cur, path->slots[*level]);
2280                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281                         if (!extent_buffer_uptodate(next)) {
2282                                 struct btrfs_key node_key;
2283
2284                                 btrfs_node_key_to_cpu(path->nodes[*level],
2285                                                       &node_key,
2286                                                       path->slots[*level]);
2287                                 btrfs_add_corrupt_extent_record(root->fs_info,
2288                                                 &node_key,
2289                                                 path->nodes[*level]->start,
2290                                                 root->fs_info->nodesize,
2291                                                 *level);
2292                                 err = -EIO;
2293                                 goto out;
2294                         }
2295                 }
2296
2297                 ret = check_child_node(cur, path->slots[*level], next);
2298                 if (ret) {
2299                         free_extent_buffer(next);
2300                         err = ret;
2301                         goto out;
2302                 }
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         err = -EIO;
2311                         goto out;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319 out:
2320         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2321         return err;
2322 }
2323
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325                             unsigned int ext_ref);
2326
2327 /*
2328  * Returns >0  Found error, should continue
2329  * Returns <0  Fatal error, must exit the whole check
2330  * Returns 0   No errors found
2331  */
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333                              int *level, struct node_refs *nrefs, int ext_ref)
2334 {
2335         enum btrfs_tree_block_status status;
2336         u64 bytenr;
2337         u64 ptr_gen;
2338         struct btrfs_fs_info *fs_info = root->fs_info;
2339         struct extent_buffer *next;
2340         struct extent_buffer *cur;
2341         int ret;
2342
2343         WARN_ON(*level < 0);
2344         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2345
2346         ret = update_nodes_refs(root, path->nodes[*level]->start,
2347                                 nrefs, *level);
2348         if (ret < 0)
2349                 return ret;
2350
2351         while (*level >= 0) {
2352                 WARN_ON(*level < 0);
2353                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354                 cur = path->nodes[*level];
2355
2356                 if (btrfs_header_level(cur) != *level)
2357                         WARN_ON(1);
2358
2359                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2360                         break;
2361                 /* Don't forgot to check leaf/node validation */
2362                 if (*level == 0) {
2363                         ret = btrfs_check_leaf(root, NULL, cur);
2364                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2365                                 ret = -EIO;
2366                                 break;
2367                         }
2368                         ret = process_one_leaf_v2(root, path, nrefs,
2369                                                   level, ext_ref);
2370                         cur = path->nodes[*level];
2371                         break;
2372                 } else {
2373                         ret = btrfs_check_node(root, NULL, cur);
2374                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2375                                 ret = -EIO;
2376                                 break;
2377                         }
2378                 }
2379                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2381
2382                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2383                 if (ret)
2384                         break;
2385                 if (!nrefs->need_check[*level - 1]) {
2386                         path->slots[*level]++;
2387                         continue;
2388                 }
2389
2390                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392                         free_extent_buffer(next);
2393                         reada_walk_down(root, cur, path->slots[*level]);
2394                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2395                         if (!extent_buffer_uptodate(next)) {
2396                                 struct btrfs_key node_key;
2397
2398                                 btrfs_node_key_to_cpu(path->nodes[*level],
2399                                                       &node_key,
2400                                                       path->slots[*level]);
2401                                 btrfs_add_corrupt_extent_record(fs_info,
2402                                                 &node_key,
2403                                                 path->nodes[*level]->start,
2404                                                 fs_info->nodesize,
2405                                                 *level);
2406                                 ret = -EIO;
2407                                 break;
2408                         }
2409                 }
2410
2411                 ret = check_child_node(cur, path->slots[*level], next);
2412                 if (ret < 0) 
2413                         break;
2414
2415                 if (btrfs_is_leaf(next))
2416                         status = btrfs_check_leaf(root, NULL, next);
2417                 else
2418                         status = btrfs_check_node(root, NULL, next);
2419                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420                         free_extent_buffer(next);
2421                         ret = -EIO;
2422                         break;
2423                 }
2424
2425                 *level = *level - 1;
2426                 free_extent_buffer(path->nodes[*level]);
2427                 path->nodes[*level] = next;
2428                 path->slots[*level] = 0;
2429         }
2430         return ret;
2431 }
2432
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434                         struct walk_control *wc, int *level)
2435 {
2436         int i;
2437         struct extent_buffer *leaf;
2438
2439         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440                 leaf = path->nodes[i];
2441                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2442                         path->slots[i]++;
2443                         *level = i;
2444                         return 0;
2445                 } else {
2446                         free_extent_buffer(path->nodes[*level]);
2447                         path->nodes[*level] = NULL;
2448                         BUG_ON(*level > wc->active_node);
2449                         if (*level == wc->active_node)
2450                                 leave_shared_node(root, wc, *level);
2451                         *level = i + 1;
2452                 }
2453         }
2454         return 1;
2455 }
2456
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2458                            int *level)
2459 {
2460         int i;
2461         struct extent_buffer *leaf;
2462
2463         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464                 leaf = path->nodes[i];
2465                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2466                         path->slots[i]++;
2467                         *level = i;
2468                         return 0;
2469                 } else {
2470                         free_extent_buffer(path->nodes[*level]);
2471                         path->nodes[*level] = NULL;
2472                         *level = i + 1;
2473                 }
2474         }
2475         return 1;
2476 }
2477
2478 static int check_root_dir(struct inode_record *rec)
2479 {
2480         struct inode_backref *backref;
2481         int ret = -1;
2482
2483         if (!rec->found_inode_item || rec->errors)
2484                 goto out;
2485         if (rec->nlink != 1 || rec->found_link != 0)
2486                 goto out;
2487         if (list_empty(&rec->backrefs))
2488                 goto out;
2489         backref = to_inode_backref(rec->backrefs.next);
2490         if (!backref->found_inode_ref)
2491                 goto out;
2492         if (backref->index != 0 || backref->namelen != 2 ||
2493             memcmp(backref->name, "..", 2))
2494                 goto out;
2495         if (backref->found_dir_index || backref->found_dir_item)
2496                 goto out;
2497         ret = 0;
2498 out:
2499         return ret;
2500 }
2501
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503                               struct btrfs_root *root, struct btrfs_path *path,
2504                               struct inode_record *rec)
2505 {
2506         struct btrfs_inode_item *ei;
2507         struct btrfs_key key;
2508         int ret;
2509
2510         key.objectid = rec->ino;
2511         key.type = BTRFS_INODE_ITEM_KEY;
2512         key.offset = (u64)-1;
2513
2514         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2515         if (ret < 0)
2516                 goto out;
2517         if (ret) {
2518                 if (!path->slots[0]) {
2519                         ret = -ENOENT;
2520                         goto out;
2521                 }
2522                 path->slots[0]--;
2523                 ret = 0;
2524         }
2525         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526         if (key.objectid != rec->ino) {
2527                 ret = -ENOENT;
2528                 goto out;
2529         }
2530
2531         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532                             struct btrfs_inode_item);
2533         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534         btrfs_mark_buffer_dirty(path->nodes[0]);
2535         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537                root->root_key.objectid);
2538 out:
2539         btrfs_release_path(path);
2540         return ret;
2541 }
2542
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544                                     struct btrfs_root *root,
2545                                     struct btrfs_path *path,
2546                                     struct inode_record *rec)
2547 {
2548         int ret;
2549
2550         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551         btrfs_release_path(path);
2552         if (!ret)
2553                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2554         return ret;
2555 }
2556
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558                                struct btrfs_root *root,
2559                                struct btrfs_path *path,
2560                                struct inode_record *rec)
2561 {
2562         struct btrfs_inode_item *ei;
2563         struct btrfs_key key;
2564         int ret = 0;
2565
2566         key.objectid = rec->ino;
2567         key.type = BTRFS_INODE_ITEM_KEY;
2568         key.offset = 0;
2569
2570         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2571         if (ret) {
2572                 if (ret > 0)
2573                         ret = -ENOENT;
2574                 goto out;
2575         }
2576
2577         /* Since ret == 0, no need to check anything */
2578         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579                             struct btrfs_inode_item);
2580         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581         btrfs_mark_buffer_dirty(path->nodes[0]);
2582         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583         printf("reset nbytes for ino %llu root %llu\n",
2584                rec->ino, root->root_key.objectid);
2585 out:
2586         btrfs_release_path(path);
2587         return ret;
2588 }
2589
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591                                  struct cache_tree *inode_cache,
2592                                  struct inode_record *rec,
2593                                  struct inode_backref *backref)
2594 {
2595         struct btrfs_path path;
2596         struct btrfs_trans_handle *trans;
2597         struct btrfs_dir_item *dir_item;
2598         struct extent_buffer *leaf;
2599         struct btrfs_key key;
2600         struct btrfs_disk_key disk_key;
2601         struct inode_record *dir_rec;
2602         unsigned long name_ptr;
2603         u32 data_size = sizeof(*dir_item) + backref->namelen;
2604         int ret;
2605
2606         trans = btrfs_start_transaction(root, 1);
2607         if (IS_ERR(trans))
2608                 return PTR_ERR(trans);
2609
2610         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611                 (unsigned long long)rec->ino);
2612
2613         btrfs_init_path(&path);
2614         key.objectid = backref->dir;
2615         key.type = BTRFS_DIR_INDEX_KEY;
2616         key.offset = backref->index;
2617         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2618         BUG_ON(ret);
2619
2620         leaf = path.nodes[0];
2621         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2622
2623         disk_key.objectid = cpu_to_le64(rec->ino);
2624         disk_key.type = BTRFS_INODE_ITEM_KEY;
2625         disk_key.offset = 0;
2626
2627         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629         btrfs_set_dir_data_len(leaf, dir_item, 0);
2630         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631         name_ptr = (unsigned long)(dir_item + 1);
2632         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633         btrfs_mark_buffer_dirty(leaf);
2634         btrfs_release_path(&path);
2635         btrfs_commit_transaction(trans, root);
2636
2637         backref->found_dir_index = 1;
2638         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639         BUG_ON(IS_ERR(dir_rec));
2640         if (!dir_rec)
2641                 return 0;
2642         dir_rec->found_size += backref->namelen;
2643         if (dir_rec->found_size == dir_rec->isize &&
2644             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646         if (dir_rec->found_size != dir_rec->isize)
2647                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2648
2649         return 0;
2650 }
2651
2652 static int delete_dir_index(struct btrfs_root *root,
2653                             struct inode_backref *backref)
2654 {
2655         struct btrfs_trans_handle *trans;
2656         struct btrfs_dir_item *di;
2657         struct btrfs_path path;
2658         int ret = 0;
2659
2660         trans = btrfs_start_transaction(root, 1);
2661         if (IS_ERR(trans))
2662                 return PTR_ERR(trans);
2663
2664         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665                 (unsigned long long)backref->dir,
2666                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667                 (unsigned long long)root->objectid);
2668
2669         btrfs_init_path(&path);
2670         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671                                     backref->name, backref->namelen,
2672                                     backref->index, -1);
2673         if (IS_ERR(di)) {
2674                 ret = PTR_ERR(di);
2675                 btrfs_release_path(&path);
2676                 btrfs_commit_transaction(trans, root);
2677                 if (ret == -ENOENT)
2678                         return 0;
2679                 return ret;
2680         }
2681
2682         if (!di)
2683                 ret = btrfs_del_item(trans, root, &path);
2684         else
2685                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2686         BUG_ON(ret);
2687         btrfs_release_path(&path);
2688         btrfs_commit_transaction(trans, root);
2689         return ret;
2690 }
2691
2692 static int create_inode_item(struct btrfs_root *root,
2693                              struct inode_record *rec,
2694                              int root_dir)
2695 {
2696         struct btrfs_trans_handle *trans;
2697         struct btrfs_inode_item inode_item;
2698         time_t now = time(NULL);
2699         int ret;
2700
2701         trans = btrfs_start_transaction(root, 1);
2702         if (IS_ERR(trans)) {
2703                 ret = PTR_ERR(trans);
2704                 return ret;
2705         }
2706
2707         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708                 "be incomplete, please check permissions and content after "
2709                 "the fsck completes.\n", (unsigned long long)root->objectid,
2710                 (unsigned long long)rec->ino);
2711
2712         memset(&inode_item, 0, sizeof(inode_item));
2713         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2714         if (root_dir)
2715                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2716         else
2717                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719         if (rec->found_dir_item) {
2720                 if (rec->found_file_extent)
2721                         fprintf(stderr, "root %llu inode %llu has both a dir "
2722                                 "item and extents, unsure if it is a dir or a "
2723                                 "regular file so setting it as a directory\n",
2724                                 (unsigned long long)root->objectid,
2725                                 (unsigned long long)rec->ino);
2726                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728         } else if (!rec->found_dir_item) {
2729                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2731         }
2732         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2740
2741         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2742         BUG_ON(ret);
2743         btrfs_commit_transaction(trans, root);
2744         return 0;
2745 }
2746
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748                                  struct inode_record *rec,
2749                                  struct cache_tree *inode_cache,
2750                                  int delete)
2751 {
2752         struct inode_backref *tmp, *backref;
2753         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2754         int ret = 0;
2755         int repaired = 0;
2756
2757         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758                 if (!delete && rec->ino == root_dirid) {
2759                         if (!rec->found_inode_item) {
2760                                 ret = create_inode_item(root, rec, 1);
2761                                 if (ret)
2762                                         break;
2763                                 repaired++;
2764                         }
2765                 }
2766
2767                 /* Index 0 for root dir's are special, don't mess with it */
2768                 if (rec->ino == root_dirid && backref->index == 0)
2769                         continue;
2770
2771                 if (delete &&
2772                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2773                      (backref->found_dir_index && backref->found_inode_ref &&
2774                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775                         ret = delete_dir_index(root, backref);
2776                         if (ret)
2777                                 break;
2778                         repaired++;
2779                         list_del(&backref->list);
2780                         free(backref);
2781                         continue;
2782                 }
2783
2784                 if (!delete && !backref->found_dir_index &&
2785                     backref->found_dir_item && backref->found_inode_ref) {
2786                         ret = add_missing_dir_index(root, inode_cache, rec,
2787                                                     backref);
2788                         if (ret)
2789                                 break;
2790                         repaired++;
2791                         if (backref->found_dir_item &&
2792                             backref->found_dir_index) {
2793                                 if (!backref->errors &&
2794                                     backref->found_inode_ref) {
2795                                         list_del(&backref->list);
2796                                         free(backref);
2797                                         continue;
2798                                 }
2799                         }
2800                 }
2801
2802                 if (!delete && (!backref->found_dir_index &&
2803                                 !backref->found_dir_item &&
2804                                 backref->found_inode_ref)) {
2805                         struct btrfs_trans_handle *trans;
2806                         struct btrfs_key location;
2807
2808                         ret = check_dir_conflict(root, backref->name,
2809                                                  backref->namelen,
2810                                                  backref->dir,
2811                                                  backref->index);
2812                         if (ret) {
2813                                 /*
2814                                  * let nlink fixing routine to handle it,
2815                                  * which can do it better.
2816                                  */
2817                                 ret = 0;
2818                                 break;
2819                         }
2820                         location.objectid = rec->ino;
2821                         location.type = BTRFS_INODE_ITEM_KEY;
2822                         location.offset = 0;
2823
2824                         trans = btrfs_start_transaction(root, 1);
2825                         if (IS_ERR(trans)) {
2826                                 ret = PTR_ERR(trans);
2827                                 break;
2828                         }
2829                         fprintf(stderr, "adding missing dir index/item pair "
2830                                 "for inode %llu\n",
2831                                 (unsigned long long)rec->ino);
2832                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2833                                                     backref->namelen,
2834                                                     backref->dir, &location,
2835                                                     imode_to_type(rec->imode),
2836                                                     backref->index);
2837                         BUG_ON(ret);
2838                         btrfs_commit_transaction(trans, root);
2839                         repaired++;
2840                 }
2841
2842                 if (!delete && (backref->found_inode_ref &&
2843                                 backref->found_dir_index &&
2844                                 backref->found_dir_item &&
2845                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846                                 !rec->found_inode_item)) {
2847                         ret = create_inode_item(root, rec, 0);
2848                         if (ret)
2849                                 break;
2850                         repaired++;
2851                 }
2852
2853         }
2854         return ret ? ret : repaired;
2855 }
2856
2857 /*
2858  * To determine the file type for nlink/inode_item repair
2859  *
2860  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861  * Return -ENOENT if file type is not found.
2862  */
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2864 {
2865         struct inode_backref *backref;
2866
2867         /* For inode item recovered case */
2868         if (rec->found_inode_item) {
2869                 *type = imode_to_type(rec->imode);
2870                 return 0;
2871         }
2872
2873         list_for_each_entry(backref, &rec->backrefs, list) {
2874                 if (backref->found_dir_index || backref->found_dir_item) {
2875                         *type = backref->filetype;
2876                         return 0;
2877                 }
2878         }
2879         return -ENOENT;
2880 }
2881
2882 /*
2883  * To determine the file name for nlink repair
2884  *
2885  * Return 0 if file name is found, set name and namelen.
2886  * Return -ENOENT if file name is not found.
2887  */
2888 static int find_file_name(struct inode_record *rec,
2889                           char *name, int *namelen)
2890 {
2891         struct inode_backref *backref;
2892
2893         list_for_each_entry(backref, &rec->backrefs, list) {
2894                 if (backref->found_dir_index || backref->found_dir_item ||
2895                     backref->found_inode_ref) {
2896                         memcpy(name, backref->name, backref->namelen);
2897                         *namelen = backref->namelen;
2898                         return 0;
2899                 }
2900         }
2901         return -ENOENT;
2902 }
2903
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906                        struct btrfs_root *root,
2907                        struct btrfs_path *path,
2908                        struct inode_record *rec)
2909 {
2910         struct inode_backref *backref;
2911         struct inode_backref *tmp;
2912         struct btrfs_key key;
2913         struct btrfs_inode_item *inode_item;
2914         int ret = 0;
2915
2916         /* We don't believe this either, reset it and iterate backref */
2917         rec->found_link = 0;
2918
2919         /* Remove all backref including the valid ones */
2920         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922                                    backref->index, backref->name,
2923                                    backref->namelen, 0);
2924                 if (ret < 0)
2925                         goto out;
2926
2927                 /* remove invalid backref, so it won't be added back */
2928                 if (!(backref->found_dir_index &&
2929                       backref->found_dir_item &&
2930                       backref->found_inode_ref)) {
2931                         list_del(&backref->list);
2932                         free(backref);
2933                 } else {
2934                         rec->found_link++;
2935                 }
2936         }
2937
2938         /* Set nlink to 0 */
2939         key.objectid = rec->ino;
2940         key.type = BTRFS_INODE_ITEM_KEY;
2941         key.offset = 0;
2942         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2943         if (ret < 0)
2944                 goto out;
2945         if (ret > 0) {
2946                 ret = -ENOENT;
2947                 goto out;
2948         }
2949         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950                                     struct btrfs_inode_item);
2951         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952         btrfs_mark_buffer_dirty(path->nodes[0]);
2953         btrfs_release_path(path);
2954
2955         /*
2956          * Add back valid inode_ref/dir_item/dir_index,
2957          * add_link() will handle the nlink inc, so new nlink must be correct
2958          */
2959         list_for_each_entry(backref, &rec->backrefs, list) {
2960                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961                                      backref->name, backref->namelen,
2962                                      backref->filetype, &backref->index, 1);
2963                 if (ret < 0)
2964                         goto out;
2965         }
2966 out:
2967         btrfs_release_path(path);
2968         return ret;
2969 }
2970
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972                                 struct btrfs_root *root,
2973                                 struct btrfs_path *path,
2974                                 u64 *highest_ino)
2975 {
2976         struct btrfs_key key, found_key;
2977         int ret;
2978
2979         btrfs_init_path(path);
2980         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2981         key.offset = -1;
2982         key.type = BTRFS_INODE_ITEM_KEY;
2983         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2984         if (ret == 1) {
2985                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986                                 path->slots[0] - 1);
2987                 *highest_ino = found_key.objectid;
2988                 ret = 0;
2989         }
2990         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2991                 ret = -EOVERFLOW;
2992         btrfs_release_path(path);
2993         return ret;
2994 }
2995
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997                                struct btrfs_root *root,
2998                                struct btrfs_path *path,
2999                                struct inode_record *rec)
3000 {
3001         char *dir_name = "lost+found";
3002         char namebuf[BTRFS_NAME_LEN] = {0};
3003         u64 lost_found_ino;
3004         u32 mode = 0700;
3005         u8 type = 0;
3006         int namelen = 0;
3007         int name_recovered = 0;
3008         int type_recovered = 0;
3009         int ret = 0;
3010
3011         /*
3012          * Get file name and type first before these invalid inode ref
3013          * are deleted by remove_all_invalid_backref()
3014          */
3015         name_recovered = !find_file_name(rec, namebuf, &namelen);
3016         type_recovered = !find_file_type(rec, &type);
3017
3018         if (!name_recovered) {
3019                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020                        rec->ino, rec->ino);
3021                 namelen = count_digits(rec->ino);
3022                 sprintf(namebuf, "%llu", rec->ino);
3023                 name_recovered = 1;
3024         }
3025         if (!type_recovered) {
3026                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3027                        rec->ino);
3028                 type = BTRFS_FT_REG_FILE;
3029                 type_recovered = 1;
3030         }
3031
3032         ret = reset_nlink(trans, root, path, rec);
3033         if (ret < 0) {
3034                 fprintf(stderr,
3035                         "Failed to reset nlink for inode %llu: %s\n",
3036                         rec->ino, strerror(-ret));
3037                 goto out;
3038         }
3039
3040         if (rec->found_link == 0) {
3041                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3042                 if (ret < 0)
3043                         goto out;
3044                 lost_found_ino++;
3045                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3047                                   mode);
3048                 if (ret < 0) {
3049                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050                                 dir_name, strerror(-ret));
3051                         goto out;
3052                 }
3053                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054                                      namebuf, namelen, type, NULL, 1);
3055                 /*
3056                  * Add ".INO" suffix several times to handle case where
3057                  * "FILENAME.INO" is already taken by another file.
3058                  */
3059                 while (ret == -EEXIST) {
3060                         /*
3061                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3062                          */
3063                         if (namelen + count_digits(rec->ino) + 1 >
3064                             BTRFS_NAME_LEN) {
3065                                 ret = -EFBIG;
3066                                 goto out;
3067                         }
3068                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3069                                  ".%llu", rec->ino);
3070                         namelen += count_digits(rec->ino) + 1;
3071                         ret = btrfs_add_link(trans, root, rec->ino,
3072                                              lost_found_ino, namebuf,
3073                                              namelen, type, NULL, 1);
3074                 }
3075                 if (ret < 0) {
3076                         fprintf(stderr,
3077                                 "Failed to link the inode %llu to %s dir: %s\n",
3078                                 rec->ino, dir_name, strerror(-ret));
3079                         goto out;
3080                 }
3081                 /*
3082                  * Just increase the found_link, don't actually add the
3083                  * backref. This will make things easier and this inode
3084                  * record will be freed after the repair is done.
3085                  * So fsck will not report problem about this inode.
3086                  */
3087                 rec->found_link++;
3088                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089                        namelen, namebuf, dir_name);
3090         }
3091         printf("Fixed the nlink of inode %llu\n", rec->ino);
3092 out:
3093         /*
3094          * Clear the flag anyway, or we will loop forever for the same inode
3095          * as it will not be removed from the bad inode list and the dead loop
3096          * happens.
3097          */
3098         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099         btrfs_release_path(path);
3100         return ret;
3101 }
3102
3103 /*
3104  * Check if there is any normal(reg or prealloc) file extent for given
3105  * ino.
3106  * This is used to determine the file type when neither its dir_index/item or
3107  * inode_item exists.
3108  *
3109  * This will *NOT* report error, if any error happens, just consider it does
3110  * not have any normal file extent.
3111  */
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3113 {
3114         struct btrfs_path path;
3115         struct btrfs_key key;
3116         struct btrfs_key found_key;
3117         struct btrfs_file_extent_item *fi;
3118         u8 type;
3119         int ret = 0;
3120
3121         btrfs_init_path(&path);
3122         key.objectid = ino;
3123         key.type = BTRFS_EXTENT_DATA_KEY;
3124         key.offset = 0;
3125
3126         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3127         if (ret < 0) {
3128                 ret = 0;
3129                 goto out;
3130         }
3131         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132                 ret = btrfs_next_leaf(root, &path);
3133                 if (ret) {
3134                         ret = 0;
3135                         goto out;
3136                 }
3137         }
3138         while (1) {
3139                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3140                                       path.slots[0]);
3141                 if (found_key.objectid != ino ||
3142                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3143                         break;
3144                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145                                     struct btrfs_file_extent_item);
3146                 type = btrfs_file_extent_type(path.nodes[0], fi);
3147                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3148                         ret = 1;
3149                         goto out;
3150                 }
3151         }
3152 out:
3153         btrfs_release_path(&path);
3154         return ret;
3155 }
3156
3157 static u32 btrfs_type_to_imode(u8 type)
3158 {
3159         static u32 imode_by_btrfs_type[] = {
3160                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3161                 [BTRFS_FT_DIR]          = S_IFDIR,
3162                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3163                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3164                 [BTRFS_FT_FIFO]         = S_IFIFO,
3165                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3166                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3167         };
3168
3169         return imode_by_btrfs_type[(type)];
3170 }
3171
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173                                 struct btrfs_root *root,
3174                                 struct btrfs_path *path,
3175                                 struct inode_record *rec)
3176 {
3177         u8 filetype;
3178         u32 mode = 0700;
3179         int type_recovered = 0;
3180         int ret = 0;
3181
3182         printf("Trying to rebuild inode:%llu\n", rec->ino);
3183
3184         type_recovered = !find_file_type(rec, &filetype);
3185
3186         /*
3187          * Try to determine inode type if type not found.
3188          *
3189          * For found regular file extent, it must be FILE.
3190          * For found dir_item/index, it must be DIR.
3191          *
3192          * For undetermined one, use FILE as fallback.
3193          *
3194          * TODO:
3195          * 1. If found backref(inode_index/item is already handled) to it,
3196          *    it must be DIR.
3197          *    Need new inode-inode ref structure to allow search for that.
3198          */
3199         if (!type_recovered) {
3200                 if (rec->found_file_extent &&
3201                     find_normal_file_extent(root, rec->ino)) {
3202                         type_recovered = 1;
3203                         filetype = BTRFS_FT_REG_FILE;
3204                 } else if (rec->found_dir_item) {
3205                         type_recovered = 1;
3206                         filetype = BTRFS_FT_DIR;
3207                 } else if (!list_empty(&rec->orphan_extents)) {
3208                         type_recovered = 1;
3209                         filetype = BTRFS_FT_REG_FILE;
3210                 } else{
3211                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3212                                rec->ino);
3213                         type_recovered = 1;
3214                         filetype = BTRFS_FT_REG_FILE;
3215                 }
3216         }
3217
3218         ret = btrfs_new_inode(trans, root, rec->ino,
3219                               mode | btrfs_type_to_imode(filetype));
3220         if (ret < 0)
3221                 goto out;
3222
3223         /*
3224          * Here inode rebuild is done, we only rebuild the inode item,
3225          * don't repair the nlink(like move to lost+found).
3226          * That is the job of nlink repair.
3227          *
3228          * We just fill the record and return
3229          */
3230         rec->found_dir_item = 1;
3231         rec->imode = mode | btrfs_type_to_imode(filetype);
3232         rec->nlink = 0;
3233         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234         /* Ensure the inode_nlinks repair function will be called */
3235         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3236 out:
3237         return ret;
3238 }
3239
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241                                       struct btrfs_root *root,
3242                                       struct btrfs_path *path,
3243                                       struct inode_record *rec)
3244 {
3245         struct orphan_data_extent *orphan;
3246         struct orphan_data_extent *tmp;
3247         int ret = 0;
3248
3249         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3250                 /*
3251                  * Check for conflicting file extents
3252                  *
3253                  * Here we don't know whether the extents is compressed or not,
3254                  * so we can only assume it not compressed nor data offset,
3255                  * and use its disk_len as extent length.
3256                  */
3257                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258                                        orphan->offset, orphan->disk_len, 0);
3259                 btrfs_release_path(path);
3260                 if (ret < 0)
3261                         goto out;
3262                 if (!ret) {
3263                         fprintf(stderr,
3264                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265                                 orphan->disk_bytenr, orphan->disk_len);
3266                         ret = btrfs_free_extent(trans,
3267                                         root->fs_info->extent_root,
3268                                         orphan->disk_bytenr, orphan->disk_len,
3269                                         0, root->objectid, orphan->objectid,
3270                                         orphan->offset);
3271                         if (ret < 0)
3272                                 goto out;
3273                 }
3274                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275                                 orphan->offset, orphan->disk_bytenr,
3276                                 orphan->disk_len, orphan->disk_len);
3277                 if (ret < 0)
3278                         goto out;
3279
3280                 /* Update file size info */
3281                 rec->found_size += orphan->disk_len;
3282                 if (rec->found_size == rec->nbytes)
3283                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3284
3285                 /* Update the file extent hole info too */
3286                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3287                                            orphan->disk_len);
3288                 if (ret < 0)
3289                         goto out;
3290                 if (RB_EMPTY_ROOT(&rec->holes))
3291                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3292
3293                 list_del(&orphan->list);
3294                 free(orphan);
3295         }
3296         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3297 out:
3298         return ret;
3299 }
3300
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302                                         struct btrfs_root *root,
3303                                         struct btrfs_path *path,
3304                                         struct inode_record *rec)
3305 {
3306         struct rb_node *node;
3307         struct file_extent_hole *hole;
3308         int found = 0;
3309         int ret = 0;
3310
3311         node = rb_first(&rec->holes);
3312
3313         while (node) {
3314                 found = 1;
3315                 hole = rb_entry(node, struct file_extent_hole, node);
3316                 ret = btrfs_punch_hole(trans, root, rec->ino,
3317                                        hole->start, hole->len);
3318                 if (ret < 0)
3319                         goto out;
3320                 ret = del_file_extent_hole(&rec->holes, hole->start,
3321                                            hole->len);
3322                 if (ret < 0)
3323                         goto out;
3324                 if (RB_EMPTY_ROOT(&rec->holes))
3325                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326                 node = rb_first(&rec->holes);
3327         }
3328         /* special case for a file losing all its file extent */
3329         if (!found) {
3330                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331                                        round_up(rec->isize,
3332                                                 root->fs_info->sectorsize));
3333                 if (ret < 0)
3334                         goto out;
3335         }
3336         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337                rec->ino, root->objectid);
3338 out:
3339         return ret;
3340 }
3341
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3343 {
3344         struct btrfs_trans_handle *trans;
3345         struct btrfs_path path;
3346         int ret = 0;
3347
3348         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349                              I_ERR_NO_ORPHAN_ITEM |
3350                              I_ERR_LINK_COUNT_WRONG |
3351                              I_ERR_NO_INODE_ITEM |
3352                              I_ERR_FILE_EXTENT_ORPHAN |
3353                              I_ERR_FILE_EXTENT_DISCOUNT|
3354                              I_ERR_FILE_NBYTES_WRONG)))
3355                 return rec->errors;
3356
3357         /*
3358          * For nlink repair, it may create a dir and add link, so
3359          * 2 for parent(256)'s dir_index and dir_item
3360          * 2 for lost+found dir's inode_item and inode_ref
3361          * 1 for the new inode_ref of the file
3362          * 2 for lost+found dir's dir_index and dir_item for the file
3363          */
3364         trans = btrfs_start_transaction(root, 7);
3365         if (IS_ERR(trans))
3366                 return PTR_ERR(trans);
3367
3368         btrfs_init_path(&path);
3369         if (rec->errors & I_ERR_NO_INODE_ITEM)
3370                 ret = repair_inode_no_item(trans, root, &path, rec);
3371         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376                 ret = repair_inode_isize(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380                 ret = repair_inode_nlinks(trans, root, &path, rec);
3381         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382                 ret = repair_inode_nbytes(trans, root, &path, rec);
3383         btrfs_commit_transaction(trans, root);
3384         btrfs_release_path(&path);
3385         return ret;
3386 }
3387
3388 static int check_inode_recs(struct btrfs_root *root,
3389                             struct cache_tree *inode_cache)
3390 {
3391         struct cache_extent *cache;
3392         struct ptr_node *node;
3393         struct inode_record *rec;
3394         struct inode_backref *backref;
3395         int stage = 0;
3396         int ret = 0;
3397         int err = 0;
3398         u64 error = 0;
3399         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3400
3401         if (btrfs_root_refs(&root->root_item) == 0) {
3402                 if (!cache_tree_empty(inode_cache))
3403                         fprintf(stderr, "warning line %d\n", __LINE__);
3404                 return 0;
3405         }
3406
3407         /*
3408          * We need to repair backrefs first because we could change some of the
3409          * errors in the inode recs.
3410          *
3411          * We also need to go through and delete invalid backrefs first and then
3412          * add the correct ones second.  We do this because we may get EEXIST
3413          * when adding back the correct index because we hadn't yet deleted the
3414          * invalid index.
3415          *
3416          * For example, if we were missing a dir index then the directories
3417          * isize would be wrong, so if we fixed the isize to what we thought it
3418          * would be and then fixed the backref we'd still have a invalid fs, so
3419          * we need to add back the dir index and then check to see if the isize
3420          * is still wrong.
3421          */
3422         while (stage < 3) {
3423                 stage++;
3424                 if (stage == 3 && !err)
3425                         break;
3426
3427                 cache = search_cache_extent(inode_cache, 0);
3428                 while (repair && cache) {
3429                         node = container_of(cache, struct ptr_node, cache);
3430                         rec = node->data;
3431                         cache = next_cache_extent(cache);
3432
3433                         /* Need to free everything up and rescan */
3434                         if (stage == 3) {
3435                                 remove_cache_extent(inode_cache, &node->cache);
3436                                 free(node);
3437                                 free_inode_rec(rec);
3438                                 continue;
3439                         }
3440
3441                         if (list_empty(&rec->backrefs))
3442                                 continue;
3443
3444                         ret = repair_inode_backrefs(root, rec, inode_cache,
3445                                                     stage == 1);
3446                         if (ret < 0) {
3447                                 err = ret;
3448                                 stage = 2;
3449                                 break;
3450                         } if (ret > 0) {
3451                                 err = -EAGAIN;
3452                         }
3453                 }
3454         }
3455         if (err)
3456                 return err;
3457
3458         rec = get_inode_rec(inode_cache, root_dirid, 0);
3459         BUG_ON(IS_ERR(rec));
3460         if (rec) {
3461                 ret = check_root_dir(rec);
3462                 if (ret) {
3463                         fprintf(stderr, "root %llu root dir %llu error\n",
3464                                 (unsigned long long)root->root_key.objectid,
3465                                 (unsigned long long)root_dirid);
3466                         print_inode_error(root, rec);
3467                         error++;
3468                 }
3469         } else {
3470                 if (repair) {
3471                         struct btrfs_trans_handle *trans;
3472
3473                         trans = btrfs_start_transaction(root, 1);
3474                         if (IS_ERR(trans)) {
3475                                 err = PTR_ERR(trans);
3476                                 return err;
3477                         }
3478
3479                         fprintf(stderr,
3480                                 "root %llu missing its root dir, recreating\n",
3481                                 (unsigned long long)root->objectid);
3482
3483                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3484                         BUG_ON(ret);
3485
3486                         btrfs_commit_transaction(trans, root);
3487                         return -EAGAIN;
3488                 }
3489
3490                 fprintf(stderr, "root %llu root dir %llu not found\n",
3491                         (unsigned long long)root->root_key.objectid,
3492                         (unsigned long long)root_dirid);
3493         }
3494
3495         while (1) {
3496                 cache = search_cache_extent(inode_cache, 0);
3497                 if (!cache)
3498                         break;
3499                 node = container_of(cache, struct ptr_node, cache);
3500                 rec = node->data;
3501                 remove_cache_extent(inode_cache, &node->cache);
3502                 free(node);
3503                 if (rec->ino == root_dirid ||
3504                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505                         free_inode_rec(rec);
3506                         continue;
3507                 }
3508
3509                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510                         ret = check_orphan_item(root, rec->ino);
3511                         if (ret == 0)
3512                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513                         if (can_free_inode_rec(rec)) {
3514                                 free_inode_rec(rec);
3515                                 continue;
3516                         }
3517                 }
3518
3519                 if (!rec->found_inode_item)
3520                         rec->errors |= I_ERR_NO_INODE_ITEM;
3521                 if (rec->found_link != rec->nlink)
3522                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3523                 if (repair) {
3524                         ret = try_repair_inode(root, rec);
3525                         if (ret == 0 && can_free_inode_rec(rec)) {
3526                                 free_inode_rec(rec);
3527                                 continue;
3528                         }
3529                         ret = 0;
3530                 }
3531
3532                 if (!(repair && ret == 0))
3533                         error++;
3534                 print_inode_error(root, rec);
3535                 list_for_each_entry(backref, &rec->backrefs, list) {
3536                         if (!backref->found_dir_item)
3537                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538                         if (!backref->found_dir_index)
3539                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540                         if (!backref->found_inode_ref)
3541                                 backref->errors |= REF_ERR_NO_INODE_REF;
3542                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543                                 " namelen %u name %s filetype %d errors %x",
3544                                 (unsigned long long)backref->dir,
3545                                 (unsigned long long)backref->index,
3546                                 backref->namelen, backref->name,
3547                                 backref->filetype, backref->errors);
3548                         print_ref_error(backref->errors);
3549                 }
3550                 free_inode_rec(rec);
3551         }
3552         return (error > 0) ? -1 : 0;
3553 }
3554
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3556                                         u64 objectid)
3557 {
3558         struct cache_extent *cache;
3559         struct root_record *rec = NULL;
3560         int ret;
3561
3562         cache = lookup_cache_extent(root_cache, objectid, 1);
3563         if (cache) {
3564                 rec = container_of(cache, struct root_record, cache);
3565         } else {
3566                 rec = calloc(1, sizeof(*rec));
3567                 if (!rec)
3568                         return ERR_PTR(-ENOMEM);
3569                 rec->objectid = objectid;
3570                 INIT_LIST_HEAD(&rec->backrefs);
3571                 rec->cache.start = objectid;
3572                 rec->cache.size = 1;
3573
3574                 ret = insert_cache_extent(root_cache, &rec->cache);
3575                 if (ret)
3576                         return ERR_PTR(-EEXIST);
3577         }
3578         return rec;
3579 }
3580
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582                                              u64 ref_root, u64 dir, u64 index,
3583                                              const char *name, int namelen)
3584 {
3585         struct root_backref *backref;
3586
3587         list_for_each_entry(backref, &rec->backrefs, list) {
3588                 if (backref->ref_root != ref_root || backref->dir != dir ||
3589                     backref->namelen != namelen)
3590                         continue;
3591                 if (memcmp(name, backref->name, namelen))
3592                         continue;
3593                 return backref;
3594         }
3595
3596         backref = calloc(1, sizeof(*backref) + namelen + 1);
3597         if (!backref)
3598                 return NULL;
3599         backref->ref_root = ref_root;
3600         backref->dir = dir;
3601         backref->index = index;
3602         backref->namelen = namelen;
3603         memcpy(backref->name, name, namelen);
3604         backref->name[namelen] = '\0';
3605         list_add_tail(&backref->list, &rec->backrefs);
3606         return backref;
3607 }
3608
3609 static void free_root_record(struct cache_extent *cache)
3610 {
3611         struct root_record *rec;
3612         struct root_backref *backref;
3613
3614         rec = container_of(cache, struct root_record, cache);
3615         while (!list_empty(&rec->backrefs)) {
3616                 backref = to_root_backref(rec->backrefs.next);
3617                 list_del(&backref->list);
3618                 free(backref);
3619         }
3620
3621         free(rec);
3622 }
3623
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3625
3626 static int add_root_backref(struct cache_tree *root_cache,
3627                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3628                             const char *name, int namelen,
3629                             int item_type, int errors)
3630 {
3631         struct root_record *rec;
3632         struct root_backref *backref;
3633
3634         rec = get_root_rec(root_cache, root_id);
3635         BUG_ON(IS_ERR(rec));
3636         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3637         BUG_ON(!backref);
3638
3639         backref->errors |= errors;
3640
3641         if (item_type != BTRFS_DIR_ITEM_KEY) {
3642                 if (backref->found_dir_index || backref->found_back_ref ||
3643                     backref->found_forward_ref) {
3644                         if (backref->index != index)
3645                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3646                 } else {
3647                         backref->index = index;
3648                 }
3649         }
3650
3651         if (item_type == BTRFS_DIR_ITEM_KEY) {
3652                 if (backref->found_forward_ref)
3653                         rec->found_ref++;
3654                 backref->found_dir_item = 1;
3655         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656                 backref->found_dir_index = 1;
3657         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658                 if (backref->found_forward_ref)
3659                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3660                 else if (backref->found_dir_item)
3661                         rec->found_ref++;
3662                 backref->found_forward_ref = 1;
3663         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664                 if (backref->found_back_ref)
3665                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666                 backref->found_back_ref = 1;
3667         } else {
3668                 BUG_ON(1);
3669         }
3670
3671         if (backref->found_forward_ref && backref->found_dir_item)
3672                 backref->reachable = 1;
3673         return 0;
3674 }
3675
3676 static int merge_root_recs(struct btrfs_root *root,
3677                            struct cache_tree *src_cache,
3678                            struct cache_tree *dst_cache)
3679 {
3680         struct cache_extent *cache;
3681         struct ptr_node *node;
3682         struct inode_record *rec;
3683         struct inode_backref *backref;
3684         int ret = 0;
3685
3686         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687                 free_inode_recs_tree(src_cache);
3688                 return 0;
3689         }
3690
3691         while (1) {
3692                 cache = search_cache_extent(src_cache, 0);
3693                 if (!cache)
3694                         break;
3695                 node = container_of(cache, struct ptr_node, cache);
3696                 rec = node->data;
3697                 remove_cache_extent(src_cache, &node->cache);
3698                 free(node);
3699
3700                 ret = is_child_root(root, root->objectid, rec->ino);
3701                 if (ret < 0)
3702                         break;
3703                 else if (ret == 0)
3704                         goto skip;
3705
3706                 list_for_each_entry(backref, &rec->backrefs, list) {
3707                         BUG_ON(backref->found_inode_ref);
3708                         if (backref->found_dir_item)
3709                                 add_root_backref(dst_cache, rec->ino,
3710                                         root->root_key.objectid, backref->dir,
3711                                         backref->index, backref->name,
3712                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3713                                         backref->errors);
3714                         if (backref->found_dir_index)
3715                                 add_root_backref(dst_cache, rec->ino,
3716                                         root->root_key.objectid, backref->dir,
3717                                         backref->index, backref->name,
3718                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3719                                         backref->errors);
3720                 }
3721 skip:
3722                 free_inode_rec(rec);
3723         }
3724         if (ret < 0)
3725                 return ret;
3726         return 0;
3727 }
3728
3729 static int check_root_refs(struct btrfs_root *root,
3730                            struct cache_tree *root_cache)
3731 {
3732         struct root_record *rec;
3733         struct root_record *ref_root;
3734         struct root_backref *backref;
3735         struct cache_extent *cache;
3736         int loop = 1;
3737         int ret;
3738         int error;
3739         int errors = 0;
3740
3741         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742         BUG_ON(IS_ERR(rec));
3743         rec->found_ref = 1;
3744
3745         /* fixme: this can not detect circular references */
3746         while (loop) {
3747                 loop = 0;
3748                 cache = search_cache_extent(root_cache, 0);
3749                 while (1) {
3750                         if (!cache)
3751                                 break;
3752                         rec = container_of(cache, struct root_record, cache);
3753                         cache = next_cache_extent(cache);
3754
3755                         if (rec->found_ref == 0)
3756                                 continue;
3757
3758                         list_for_each_entry(backref, &rec->backrefs, list) {
3759                                 if (!backref->reachable)
3760                                         continue;
3761
3762                                 ref_root = get_root_rec(root_cache,
3763                                                         backref->ref_root);
3764                                 BUG_ON(IS_ERR(ref_root));
3765                                 if (ref_root->found_ref > 0)
3766                                         continue;
3767
3768                                 backref->reachable = 0;
3769                                 rec->found_ref--;
3770                                 if (rec->found_ref == 0)
3771                                         loop = 1;
3772                         }
3773                 }
3774         }
3775
3776         cache = search_cache_extent(root_cache, 0);
3777         while (1) {
3778                 if (!cache)
3779                         break;
3780                 rec = container_of(cache, struct root_record, cache);
3781                 cache = next_cache_extent(cache);
3782
3783                 if (rec->found_ref == 0 &&
3784                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786                         ret = check_orphan_item(root->fs_info->tree_root,
3787                                                 rec->objectid);
3788                         if (ret == 0)
3789                                 continue;
3790
3791                         /*
3792                          * If we don't have a root item then we likely just have
3793                          * a dir item in a snapshot for this root but no actual
3794                          * ref key or anything so it's meaningless.
3795                          */
3796                         if (!rec->found_root_item)
3797                                 continue;
3798                         errors++;
3799                         fprintf(stderr, "fs tree %llu not referenced\n",
3800                                 (unsigned long long)rec->objectid);
3801                 }
3802
3803                 error = 0;
3804                 if (rec->found_ref > 0 && !rec->found_root_item)
3805                         error = 1;
3806                 list_for_each_entry(backref, &rec->backrefs, list) {
3807                         if (!backref->found_dir_item)
3808                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809                         if (!backref->found_dir_index)
3810                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811                         if (!backref->found_back_ref)
3812                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813                         if (!backref->found_forward_ref)
3814                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3815                         if (backref->reachable && backref->errors)
3816                                 error = 1;
3817                 }
3818                 if (!error)
3819                         continue;
3820
3821                 errors++;
3822                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823                         (unsigned long long)rec->objectid, rec->found_ref,
3824                          rec->found_root_item ? "" : "not found");
3825
3826                 list_for_each_entry(backref, &rec->backrefs, list) {
3827                         if (!backref->reachable)
3828                                 continue;
3829                         if (!backref->errors && rec->found_root_item)
3830                                 continue;
3831                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832                                 " index %llu namelen %u name %s errors %x\n",
3833                                 (unsigned long long)backref->ref_root,
3834                                 (unsigned long long)backref->dir,
3835                                 (unsigned long long)backref->index,
3836                                 backref->namelen, backref->name,
3837                                 backref->errors);
3838                         print_ref_error(backref->errors);
3839                 }
3840         }
3841         return errors > 0 ? 1 : 0;
3842 }
3843
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845                             struct btrfs_key *key,
3846                             struct cache_tree *root_cache)
3847 {
3848         u64 dirid;
3849         u64 index;
3850         u32 len;
3851         u32 name_len;
3852         struct btrfs_root_ref *ref;
3853         char namebuf[BTRFS_NAME_LEN];
3854         int error;
3855
3856         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3857
3858         dirid = btrfs_root_ref_dirid(eb, ref);
3859         index = btrfs_root_ref_sequence(eb, ref);
3860         name_len = btrfs_root_ref_name_len(eb, ref);
3861
3862         if (name_len <= BTRFS_NAME_LEN) {
3863                 len = name_len;
3864                 error = 0;
3865         } else {
3866                 len = BTRFS_NAME_LEN;
3867                 error = REF_ERR_NAME_TOO_LONG;
3868         }
3869         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3870
3871         if (key->type == BTRFS_ROOT_REF_KEY) {
3872                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873                                  index, namebuf, len, key->type, error);
3874         } else {
3875                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876                                  index, namebuf, len, key->type, error);
3877         }
3878         return 0;
3879 }
3880
3881 static void free_corrupt_block(struct cache_extent *cache)
3882 {
3883         struct btrfs_corrupt_block *corrupt;
3884
3885         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3886         free(corrupt);
3887 }
3888
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3890
3891 /*
3892  * Repair the btree of the given root.
3893  *
3894  * The fix is to remove the node key in corrupt_blocks cache_tree.
3895  * and rebalance the tree.
3896  * After the fix, the btree should be writeable.
3897  */
3898 static int repair_btree(struct btrfs_root *root,
3899                         struct cache_tree *corrupt_blocks)
3900 {
3901         struct btrfs_trans_handle *trans;
3902         struct btrfs_path path;
3903         struct btrfs_corrupt_block *corrupt;
3904         struct cache_extent *cache;
3905         struct btrfs_key key;
3906         u64 offset;
3907         int level;
3908         int ret = 0;
3909
3910         if (cache_tree_empty(corrupt_blocks))
3911                 return 0;
3912
3913         trans = btrfs_start_transaction(root, 1);
3914         if (IS_ERR(trans)) {
3915                 ret = PTR_ERR(trans);
3916                 fprintf(stderr, "Error starting transaction: %s\n",
3917                         strerror(-ret));
3918                 return ret;
3919         }
3920         btrfs_init_path(&path);
3921         cache = first_cache_extent(corrupt_blocks);
3922         while (cache) {
3923                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3924                                        cache);
3925                 level = corrupt->level;
3926                 path.lowest_level = level;
3927                 key.objectid = corrupt->key.objectid;
3928                 key.type = corrupt->key.type;
3929                 key.offset = corrupt->key.offset;
3930
3931                 /*
3932                  * Here we don't want to do any tree balance, since it may
3933                  * cause a balance with corrupted brother leaf/node,
3934                  * so ins_len set to 0 here.
3935                  * Balance will be done after all corrupt node/leaf is deleted.
3936                  */
3937                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3938                 if (ret < 0)
3939                         goto out;
3940                 offset = btrfs_node_blockptr(path.nodes[level],
3941                                              path.slots[level]);
3942
3943                 /* Remove the ptr */
3944                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3945                 if (ret < 0)
3946                         goto out;
3947                 /*
3948                  * Remove the corresponding extent
3949                  * return value is not concerned.
3950                  */
3951                 btrfs_release_path(&path);
3952                 ret = btrfs_free_extent(trans, root, offset,
3953                                 root->fs_info->nodesize, 0,
3954                                 root->root_key.objectid, level - 1, 0);
3955                 cache = next_cache_extent(cache);
3956         }
3957
3958         /* Balance the btree using btrfs_search_slot() */
3959         cache = first_cache_extent(corrupt_blocks);
3960         while (cache) {
3961                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3962                                        cache);
3963                 memcpy(&key, &corrupt->key, sizeof(key));
3964                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3965                 if (ret < 0)
3966                         goto out;
3967                 /* return will always >0 since it won't find the item */
3968                 ret = 0;
3969                 btrfs_release_path(&path);
3970                 cache = next_cache_extent(cache);
3971         }
3972 out:
3973         btrfs_commit_transaction(trans, root);
3974         btrfs_release_path(&path);
3975         return ret;
3976 }
3977
3978 static int check_fs_root(struct btrfs_root *root,
3979                          struct cache_tree *root_cache,
3980                          struct walk_control *wc)
3981 {
3982         int ret = 0;
3983         int err = 0;
3984         int wret;
3985         int level;
3986         struct btrfs_path path;
3987         struct shared_node root_node;
3988         struct root_record *rec;
3989         struct btrfs_root_item *root_item = &root->root_item;
3990         struct cache_tree corrupt_blocks;
3991         struct orphan_data_extent *orphan;
3992         struct orphan_data_extent *tmp;
3993         enum btrfs_tree_block_status status;
3994         struct node_refs nrefs;
3995
3996         /*
3997          * Reuse the corrupt_block cache tree to record corrupted tree block
3998          *
3999          * Unlike the usage in extent tree check, here we do it in a per
4000          * fs/subvol tree base.
4001          */
4002         cache_tree_init(&corrupt_blocks);
4003         root->fs_info->corrupt_blocks = &corrupt_blocks;
4004
4005         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006                 rec = get_root_rec(root_cache, root->root_key.objectid);
4007                 BUG_ON(IS_ERR(rec));
4008                 if (btrfs_root_refs(root_item) > 0)
4009                         rec->found_root_item = 1;
4010         }
4011
4012         btrfs_init_path(&path);
4013         memset(&root_node, 0, sizeof(root_node));
4014         cache_tree_init(&root_node.root_cache);
4015         cache_tree_init(&root_node.inode_cache);
4016         memset(&nrefs, 0, sizeof(nrefs));
4017
4018         /* Move the orphan extent record to corresponding inode_record */
4019         list_for_each_entry_safe(orphan, tmp,
4020                                  &root->orphan_data_extents, list) {
4021                 struct inode_record *inode;
4022
4023                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4024                                       1);
4025                 BUG_ON(IS_ERR(inode));
4026                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027                 list_move(&orphan->list, &inode->orphan_extents);
4028         }
4029
4030         level = btrfs_header_level(root->node);
4031         memset(wc->nodes, 0, sizeof(wc->nodes));
4032         wc->nodes[level] = &root_node;
4033         wc->active_node = level;
4034         wc->root_level = level;
4035
4036         /* We may not have checked the root block, lets do that now */
4037         if (btrfs_is_leaf(root->node))
4038                 status = btrfs_check_leaf(root, NULL, root->node);
4039         else
4040                 status = btrfs_check_node(root, NULL, root->node);
4041         if (status != BTRFS_TREE_BLOCK_CLEAN)
4042                 return -EIO;
4043
4044         if (btrfs_root_refs(root_item) > 0 ||
4045             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046                 path.nodes[level] = root->node;
4047                 extent_buffer_get(root->node);
4048                 path.slots[level] = 0;
4049         } else {
4050                 struct btrfs_key key;
4051                 struct btrfs_disk_key found_key;
4052
4053                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054                 level = root_item->drop_level;
4055                 path.lowest_level = level;
4056                 if (level > btrfs_header_level(root->node) ||
4057                     level >= BTRFS_MAX_LEVEL) {
4058                         error("ignoring invalid drop level: %u", level);
4059                         goto skip_walking;
4060                 }
4061                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4062                 if (wret < 0)
4063                         goto skip_walking;
4064                 btrfs_node_key(path.nodes[level], &found_key,
4065                                 path.slots[level]);
4066                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067                                         sizeof(found_key)));
4068         }
4069
4070         while (1) {
4071                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4072                 if (wret < 0)
4073                         ret = wret;
4074                 if (wret != 0)
4075                         break;
4076
4077                 wret = walk_up_tree(root, &path, wc, &level);
4078                 if (wret < 0)
4079                         ret = wret;
4080                 if (wret != 0)
4081                         break;
4082         }
4083 skip_walking:
4084         btrfs_release_path(&path);
4085
4086         if (!cache_tree_empty(&corrupt_blocks)) {
4087                 struct cache_extent *cache;
4088                 struct btrfs_corrupt_block *corrupt;
4089
4090                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091                        root->root_key.objectid);
4092                 cache = first_cache_extent(&corrupt_blocks);
4093                 while (cache) {
4094                         corrupt = container_of(cache,
4095                                                struct btrfs_corrupt_block,
4096                                                cache);
4097                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098                                cache->start, corrupt->level,
4099                                corrupt->key.objectid, corrupt->key.type,
4100                                corrupt->key.offset);
4101                         cache = next_cache_extent(cache);
4102                 }
4103                 if (repair) {
4104                         printf("Try to repair the btree for root %llu\n",
4105                                root->root_key.objectid);
4106                         ret = repair_btree(root, &corrupt_blocks);
4107                         if (ret < 0)
4108                                 fprintf(stderr, "Failed to repair btree: %s\n",
4109                                         strerror(-ret));
4110                         if (!ret)
4111                                 printf("Btree for root %llu is fixed\n",
4112                                        root->root_key.objectid);
4113                 }
4114         }
4115
4116         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4117         if (err < 0)
4118                 ret = err;
4119
4120         if (root_node.current) {
4121                 root_node.current->checked = 1;
4122                 maybe_free_inode_rec(&root_node.inode_cache,
4123                                 root_node.current);
4124         }
4125
4126         err = check_inode_recs(root, &root_node.inode_cache);
4127         if (!ret)
4128                 ret = err;
4129
4130         free_corrupt_blocks_tree(&corrupt_blocks);
4131         root->fs_info->corrupt_blocks = NULL;
4132         free_orphan_data_extents(&root->orphan_data_extents);
4133         return ret;
4134 }
4135
4136 static int fs_root_objectid(u64 objectid)
4137 {
4138         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4140                 return 1;
4141         return is_fstree(objectid);
4142 }
4143
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145                           struct cache_tree *root_cache)
4146 {
4147         struct btrfs_path path;
4148         struct btrfs_key key;
4149         struct walk_control wc;
4150         struct extent_buffer *leaf, *tree_node;
4151         struct btrfs_root *tmp_root;
4152         struct btrfs_root *tree_root = fs_info->tree_root;
4153         int ret;
4154         int err = 0;
4155
4156         if (ctx.progress_enabled) {
4157                 ctx.tp = TASK_FS_ROOTS;
4158                 task_start(ctx.info);
4159         }
4160
4161         /*
4162          * Just in case we made any changes to the extent tree that weren't
4163          * reflected into the free space cache yet.
4164          */
4165         if (repair)
4166                 reset_cached_block_groups(fs_info);
4167         memset(&wc, 0, sizeof(wc));
4168         cache_tree_init(&wc.shared);
4169         btrfs_init_path(&path);
4170
4171 again:
4172         key.offset = 0;
4173         key.objectid = 0;
4174         key.type = BTRFS_ROOT_ITEM_KEY;
4175         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4176         if (ret < 0) {
4177                 err = 1;
4178                 goto out;
4179         }
4180         tree_node = tree_root->node;
4181         while (1) {
4182                 if (tree_node != tree_root->node) {
4183                         free_root_recs_tree(root_cache);
4184                         btrfs_release_path(&path);
4185                         goto again;
4186                 }
4187                 leaf = path.nodes[0];
4188                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189                         ret = btrfs_next_leaf(tree_root, &path);
4190                         if (ret) {
4191                                 if (ret < 0)
4192                                         err = 1;
4193                                 break;
4194                         }
4195                         leaf = path.nodes[0];
4196                 }
4197                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199                     fs_root_objectid(key.objectid)) {
4200                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201                                 tmp_root = btrfs_read_fs_root_no_cache(
4202                                                 fs_info, &key);
4203                         } else {
4204                                 key.offset = (u64)-1;
4205                                 tmp_root = btrfs_read_fs_root(
4206                                                 fs_info, &key);
4207                         }
4208                         if (IS_ERR(tmp_root)) {
4209                                 err = 1;
4210                                 goto next;
4211                         }
4212                         ret = check_fs_root(tmp_root, root_cache, &wc);
4213                         if (ret == -EAGAIN) {
4214                                 free_root_recs_tree(root_cache);
4215                                 btrfs_release_path(&path);
4216                                 goto again;
4217                         }
4218                         if (ret)
4219                                 err = 1;
4220                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221                                 btrfs_free_fs_root(tmp_root);
4222                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4224                         process_root_ref(leaf, path.slots[0], &key,
4225                                          root_cache);
4226                 }
4227 next:
4228                 path.slots[0]++;
4229         }
4230 out:
4231         btrfs_release_path(&path);
4232         if (err)
4233                 free_extent_cache_tree(&wc.shared);
4234         if (!cache_tree_empty(&wc.shared))
4235                 fprintf(stderr, "warning line %d\n", __LINE__);
4236
4237         task_stop(ctx.info);
4238
4239         return err;
4240 }
4241
4242 /*
4243  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244  * INODE_REF/INODE_EXTREF match.
4245  *
4246  * @root:       the root of the fs/file tree
4247  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4248  * @key:        the key of the DIR_ITEM/DIR_INDEX
4249  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4250  *              distinguish root_dir between normal dir/file
4251  * @name:       the name in the INODE_REF/INODE_EXTREF
4252  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4253  * @mode:       the st_mode of INODE_ITEM
4254  *
4255  * Return 0 if no error occurred.
4256  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4258  * dir/file.
4259  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260  * not match for normal dir/file.
4261  */
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263                          struct btrfs_key *key, u64 index, char *name,
4264                          u32 namelen, u32 mode)
4265 {
4266         struct btrfs_path path;
4267         struct extent_buffer *node;
4268         struct btrfs_dir_item *di;
4269         struct btrfs_key location;
4270         char namebuf[BTRFS_NAME_LEN] = {0};
4271         u32 total;
4272         u32 cur = 0;
4273         u32 len;
4274         u32 name_len;
4275         u32 data_len;
4276         u8 filetype;
4277         int slot;
4278         int ret;
4279
4280         btrfs_init_path(&path);
4281         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4282         if (ret < 0) {
4283                 ret = DIR_ITEM_MISSING;
4284                 goto out;
4285         }
4286
4287         /* Process root dir and goto out*/
4288         if (index == 0) {
4289                 if (ret == 0) {
4290                         ret = ROOT_DIR_ERROR;
4291                         error(
4292                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4293                                 root->objectid,
4294                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4295                                         "REF" : "EXTREF",
4296                                 ref_key->objectid, ref_key->offset,
4297                                 key->type == BTRFS_DIR_ITEM_KEY ?
4298                                         "DIR_ITEM" : "DIR_INDEX");
4299                 } else {
4300                         ret = 0;
4301                 }
4302
4303                 goto out;
4304         }
4305
4306         /* Process normal file/dir */
4307         if (ret > 0) {
4308                 ret = DIR_ITEM_MISSING;
4309                 error(
4310                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4311                         root->objectid,
4312                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313                         ref_key->objectid, ref_key->offset,
4314                         key->type == BTRFS_DIR_ITEM_KEY ?
4315                                 "DIR_ITEM" : "DIR_INDEX",
4316                         key->objectid, key->offset, namelen, name,
4317                         imode_to_type(mode));
4318                 goto out;
4319         }
4320
4321         /* Check whether inode_id/filetype/name match */
4322         node = path.nodes[0];
4323         slot = path.slots[0];
4324         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325         total = btrfs_item_size_nr(node, slot);
4326         while (cur < total) {
4327                 ret = DIR_ITEM_MISMATCH;
4328                 name_len = btrfs_dir_name_len(node, di);
4329                 data_len = btrfs_dir_data_len(node, di);
4330
4331                 btrfs_dir_item_key_to_cpu(node, di, &location);
4332                 if (location.objectid != ref_key->objectid ||
4333                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4334                     location.offset != 0)
4335                         goto next;
4336
4337                 filetype = btrfs_dir_type(node, di);
4338                 if (imode_to_type(mode) != filetype)
4339                         goto next;
4340
4341                 if (cur + sizeof(*di) + name_len > total ||
4342                     name_len > BTRFS_NAME_LEN) {
4343                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4344                                 root->objectid,
4345                                 key->type == BTRFS_DIR_ITEM_KEY ?
4346                                 "DIR_ITEM" : "DIR_INDEX",
4347                                 key->objectid, key->offset, name_len);
4348
4349                         if (cur + sizeof(*di) > total)
4350                                 break;
4351                         len = min_t(u32, total - cur - sizeof(*di),
4352                                     BTRFS_NAME_LEN);
4353                 } else {
4354                         len = name_len;
4355                 }
4356
4357                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358                 if (len != namelen || strncmp(namebuf, name, len))
4359                         goto next;
4360
4361                 ret = 0;
4362                 goto out;
4363 next:
4364                 len = sizeof(*di) + name_len + data_len;
4365                 di = (struct btrfs_dir_item *)((char *)di + len);
4366                 cur += len;
4367         }
4368         if (ret == DIR_ITEM_MISMATCH)
4369                 error(
4370                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4371                         root->objectid,
4372                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373                         ref_key->objectid, ref_key->offset,
4374                         key->type == BTRFS_DIR_ITEM_KEY ?
4375                                 "DIR_ITEM" : "DIR_INDEX",
4376                         key->objectid, key->offset, namelen, name,
4377                         imode_to_type(mode));
4378 out:
4379         btrfs_release_path(&path);
4380         return ret;
4381 }
4382
4383 /*
4384  * Traverse the given INODE_REF and call find_dir_item() to find related
4385  * DIR_ITEM/DIR_INDEX.
4386  *
4387  * @root:       the root of the fs/file tree
4388  * @ref_key:    the key of the INODE_REF
4389  * @refs:       the count of INODE_REF
4390  * @mode:       the st_mode of INODE_ITEM
4391  *
4392  * Return 0 if no error occurred.
4393  */
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395                            struct extent_buffer *node, int slot, u64 *refs,
4396                            int mode)
4397 {
4398         struct btrfs_key key;
4399         struct btrfs_inode_ref *ref;
4400         char namebuf[BTRFS_NAME_LEN] = {0};
4401         u32 total;
4402         u32 cur = 0;
4403         u32 len;
4404         u32 name_len;
4405         u64 index;
4406         int ret, err = 0;
4407
4408         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409         total = btrfs_item_size_nr(node, slot);
4410
4411 next:
4412         /* Update inode ref count */
4413         (*refs)++;
4414
4415         index = btrfs_inode_ref_index(node, ref);
4416         name_len = btrfs_inode_ref_name_len(node, ref);
4417         if (cur + sizeof(*ref) + name_len > total ||
4418             name_len > BTRFS_NAME_LEN) {
4419                 warning("root %llu INODE_REF[%llu %llu] name too long",
4420                         root->objectid, ref_key->objectid, ref_key->offset);
4421
4422                 if (total < cur + sizeof(*ref))
4423                         goto out;
4424                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4425         } else {
4426                 len = name_len;
4427         }
4428
4429         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4430
4431         /* Check root dir ref name */
4432         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434                       root->objectid, ref_key->objectid, ref_key->offset,
4435                       namebuf);
4436                 err |= ROOT_DIR_ERROR;
4437         }
4438
4439         /* Find related DIR_INDEX */
4440         key.objectid = ref_key->offset;
4441         key.type = BTRFS_DIR_INDEX_KEY;
4442         key.offset = index;
4443         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4444         err |= ret;
4445
4446         /* Find related dir_item */
4447         key.objectid = ref_key->offset;
4448         key.type = BTRFS_DIR_ITEM_KEY;
4449         key.offset = btrfs_name_hash(namebuf, len);
4450         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4451         err |= ret;
4452
4453         len = sizeof(*ref) + name_len;
4454         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4455         cur += len;
4456         if (cur < total)
4457                 goto next;
4458
4459 out:
4460         return err;
4461 }
4462
4463 /*
4464  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465  * DIR_ITEM/DIR_INDEX.
4466  *
4467  * @root:       the root of the fs/file tree
4468  * @ref_key:    the key of the INODE_EXTREF
4469  * @refs:       the count of INODE_EXTREF
4470  * @mode:       the st_mode of INODE_ITEM
4471  *
4472  * Return 0 if no error occurred.
4473  */
4474 static int check_inode_extref(struct btrfs_root *root,
4475                               struct btrfs_key *ref_key,
4476                               struct extent_buffer *node, int slot, u64 *refs,
4477                               int mode)
4478 {
4479         struct btrfs_key key;
4480         struct btrfs_inode_extref *extref;
4481         char namebuf[BTRFS_NAME_LEN] = {0};
4482         u32 total;
4483         u32 cur = 0;
4484         u32 len;
4485         u32 name_len;
4486         u64 index;
4487         u64 parent;
4488         int ret;
4489         int err = 0;
4490
4491         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492         total = btrfs_item_size_nr(node, slot);
4493
4494 next:
4495         /* update inode ref count */
4496         (*refs)++;
4497         name_len = btrfs_inode_extref_name_len(node, extref);
4498         index = btrfs_inode_extref_index(node, extref);
4499         parent = btrfs_inode_extref_parent(node, extref);
4500         if (name_len <= BTRFS_NAME_LEN) {
4501                 len = name_len;
4502         } else {
4503                 len = BTRFS_NAME_LEN;
4504                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505                         root->objectid, ref_key->objectid, ref_key->offset);
4506         }
4507         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4508
4509         /* Check root dir ref name */
4510         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512                       root->objectid, ref_key->objectid, ref_key->offset,
4513                       namebuf);
4514                 err |= ROOT_DIR_ERROR;
4515         }
4516
4517         /* find related dir_index */
4518         key.objectid = parent;
4519         key.type = BTRFS_DIR_INDEX_KEY;
4520         key.offset = index;
4521         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4522         err |= ret;
4523
4524         /* find related dir_item */
4525         key.objectid = parent;
4526         key.type = BTRFS_DIR_ITEM_KEY;
4527         key.offset = btrfs_name_hash(namebuf, len);
4528         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4529         err |= ret;
4530
4531         len = sizeof(*extref) + name_len;
4532         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4533         cur += len;
4534
4535         if (cur < total)
4536                 goto next;
4537
4538         return err;
4539 }
4540
4541 /*
4542  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543  * DIR_ITEM/DIR_INDEX match.
4544  * Return with @index_ret.
4545  *
4546  * @root:       the root of the fs/file tree
4547  * @key:        the key of the INODE_REF/INODE_EXTREF
4548  * @name:       the name in the INODE_REF/INODE_EXTREF
4549  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4550  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4551  *              value (64)-1 means do not check index
4552  * @ext_ref:    the EXTENDED_IREF feature
4553  *
4554  * Return 0 if no error occurred.
4555  * Return >0 for error bitmap
4556  */
4557 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4558                           char *name, int namelen, u64 *index_ret,
4559                           unsigned int ext_ref)
4560 {
4561         struct btrfs_path path;
4562         struct btrfs_inode_ref *ref;
4563         struct btrfs_inode_extref *extref;
4564         struct extent_buffer *node;
4565         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4566         u32 total;
4567         u32 cur = 0;
4568         u32 len;
4569         u32 ref_namelen;
4570         u64 ref_index;
4571         u64 parent;
4572         u64 dir_id;
4573         int slot;
4574         int ret;
4575
4576         ASSERT(index_ret);
4577
4578         btrfs_init_path(&path);
4579         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4580         if (ret) {
4581                 ret = INODE_REF_MISSING;
4582                 goto extref;
4583         }
4584
4585         node = path.nodes[0];
4586         slot = path.slots[0];
4587
4588         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4589         total = btrfs_item_size_nr(node, slot);
4590
4591         /* Iterate all entry of INODE_REF */
4592         while (cur < total) {
4593                 ret = INODE_REF_MISSING;
4594
4595                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4596                 ref_index = btrfs_inode_ref_index(node, ref);
4597                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4598                         goto next_ref;
4599
4600                 if (cur + sizeof(*ref) + ref_namelen > total ||
4601                     ref_namelen > BTRFS_NAME_LEN) {
4602                         warning("root %llu INODE %s[%llu %llu] name too long",
4603                                 root->objectid,
4604                                 key->type == BTRFS_INODE_REF_KEY ?
4605                                         "REF" : "EXTREF",
4606                                 key->objectid, key->offset);
4607
4608                         if (cur + sizeof(*ref) > total)
4609                                 break;
4610                         len = min_t(u32, total - cur - sizeof(*ref),
4611                                     BTRFS_NAME_LEN);
4612                 } else {
4613                         len = ref_namelen;
4614                 }
4615
4616                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4617                                    len);
4618
4619                 if (len != namelen || strncmp(ref_namebuf, name, len))
4620                         goto next_ref;
4621
4622                 *index_ret = ref_index;
4623                 ret = 0;
4624                 goto out;
4625 next_ref:
4626                 len = sizeof(*ref) + ref_namelen;
4627                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4628                 cur += len;
4629         }
4630
4631 extref:
4632         /* Skip if not support EXTENDED_IREF feature */
4633         if (!ext_ref)
4634                 goto out;
4635
4636         btrfs_release_path(&path);
4637         btrfs_init_path(&path);
4638
4639         dir_id = key->offset;
4640         key->type = BTRFS_INODE_EXTREF_KEY;
4641         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4642
4643         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4644         if (ret) {
4645                 ret = INODE_REF_MISSING;
4646                 goto out;
4647         }
4648
4649         node = path.nodes[0];
4650         slot = path.slots[0];
4651
4652         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4653         cur = 0;
4654         total = btrfs_item_size_nr(node, slot);
4655
4656         /* Iterate all entry of INODE_EXTREF */
4657         while (cur < total) {
4658                 ret = INODE_REF_MISSING;
4659
4660                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4661                 ref_index = btrfs_inode_extref_index(node, extref);
4662                 parent = btrfs_inode_extref_parent(node, extref);
4663                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4664                         goto next_extref;
4665
4666                 if (parent != dir_id)
4667                         goto next_extref;
4668
4669                 if (ref_namelen <= BTRFS_NAME_LEN) {
4670                         len = ref_namelen;
4671                 } else {
4672                         len = BTRFS_NAME_LEN;
4673                         warning("root %llu INODE %s[%llu %llu] name too long",
4674                                 root->objectid,
4675                                 key->type == BTRFS_INODE_REF_KEY ?
4676                                         "REF" : "EXTREF",
4677                                 key->objectid, key->offset);
4678                 }
4679                 read_extent_buffer(node, ref_namebuf,
4680                                    (unsigned long)(extref + 1), len);
4681
4682                 if (len != namelen || strncmp(ref_namebuf, name, len))
4683                         goto next_extref;
4684
4685                 *index_ret = ref_index;
4686                 ret = 0;
4687                 goto out;
4688
4689 next_extref:
4690                 len = sizeof(*extref) + ref_namelen;
4691                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4692                 cur += len;
4693
4694         }
4695 out:
4696         btrfs_release_path(&path);
4697         return ret;
4698 }
4699
4700 /*
4701  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4702  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4703  *
4704  * @root:       the root of the fs/file tree
4705  * @key:        the key of the INODE_REF/INODE_EXTREF
4706  * @size:       the st_size of the INODE_ITEM
4707  * @ext_ref:    the EXTENDED_IREF feature
4708  *
4709  * Return 0 if no error occurred.
4710  */
4711 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4712                           struct extent_buffer *node, int slot, u64 *size,
4713                           unsigned int ext_ref)
4714 {
4715         struct btrfs_dir_item *di;
4716         struct btrfs_inode_item *ii;
4717         struct btrfs_path path;
4718         struct btrfs_key location;
4719         char namebuf[BTRFS_NAME_LEN] = {0};
4720         u32 total;
4721         u32 cur = 0;
4722         u32 len;
4723         u32 name_len;
4724         u32 data_len;
4725         u8 filetype;
4726         u32 mode;
4727         u64 index;
4728         int ret;
4729         int err = 0;
4730
4731         /*
4732          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4733          * ignore index check.
4734          */
4735         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4736
4737         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4738         total = btrfs_item_size_nr(node, slot);
4739
4740         while (cur < total) {
4741                 data_len = btrfs_dir_data_len(node, di);
4742                 if (data_len)
4743                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4744                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4745                               "DIR_ITEM" : "DIR_INDEX",
4746                               key->objectid, key->offset, data_len);
4747
4748                 name_len = btrfs_dir_name_len(node, di);
4749                 if (cur + sizeof(*di) + name_len > total ||
4750                     name_len > BTRFS_NAME_LEN) {
4751                         warning("root %llu %s[%llu %llu] name too long",
4752                                 root->objectid,
4753                                 key->type == BTRFS_DIR_ITEM_KEY ?
4754                                 "DIR_ITEM" : "DIR_INDEX",
4755                                 key->objectid, key->offset);
4756
4757                         if (cur + sizeof(*di) > total)
4758                                 break;
4759                         len = min_t(u32, total - cur - sizeof(*di),
4760                                     BTRFS_NAME_LEN);
4761                 } else {
4762                         len = name_len;
4763                 }
4764                 (*size) += name_len;
4765
4766                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4767                 filetype = btrfs_dir_type(node, di);
4768
4769                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4770                     key->offset != btrfs_name_hash(namebuf, len)) {
4771                         err |= -EIO;
4772                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4773                                 root->objectid, key->objectid, key->offset,
4774                                 namebuf, len, filetype, key->offset,
4775                                 btrfs_name_hash(namebuf, len));
4776                 }
4777
4778                 btrfs_init_path(&path);
4779                 btrfs_dir_item_key_to_cpu(node, di, &location);
4780
4781                 /* Ignore related ROOT_ITEM check */
4782                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4783                         goto next;
4784
4785                 /* Check relative INODE_ITEM(existence/filetype) */
4786                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4787                 if (ret) {
4788                         err |= INODE_ITEM_MISSING;
4789                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4790                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4791                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4792                               key->offset, location.objectid, name_len,
4793                               namebuf, filetype);
4794                         goto next;
4795                 }
4796
4797                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4798                                     struct btrfs_inode_item);
4799                 mode = btrfs_inode_mode(path.nodes[0], ii);
4800
4801                 if (imode_to_type(mode) != filetype) {
4802                         err |= INODE_ITEM_MISMATCH;
4803                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4804                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4805                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4806                               key->offset, name_len, namebuf, filetype);
4807                 }
4808
4809                 /* Check relative INODE_REF/INODE_EXTREF */
4810                 location.type = BTRFS_INODE_REF_KEY;
4811                 location.offset = key->objectid;
4812                 ret = find_inode_ref(root, &location, namebuf, len,
4813                                      &index, ext_ref);
4814                 err |= ret;
4815                 if (ret & INODE_REF_MISSING)
4816                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4817                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4818                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4819                               key->offset, name_len, namebuf, filetype);
4820
4821 next:
4822                 btrfs_release_path(&path);
4823                 len = sizeof(*di) + name_len + data_len;
4824                 di = (struct btrfs_dir_item *)((char *)di + len);
4825                 cur += len;
4826
4827                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4828                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4829                               root->objectid, key->objectid, key->offset);
4830                         break;
4831                 }
4832         }
4833
4834         return err;
4835 }
4836
4837 /*
4838  * Check file extent datasum/hole, update the size of the file extents,
4839  * check and update the last offset of the file extent.
4840  *
4841  * @root:       the root of fs/file tree.
4842  * @fkey:       the key of the file extent.
4843  * @nodatasum:  INODE_NODATASUM feature.
4844  * @size:       the sum of all EXTENT_DATA items size for this inode.
4845  * @end:        the offset of the last extent.
4846  *
4847  * Return 0 if no error occurred.
4848  */
4849 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4850                              struct extent_buffer *node, int slot,
4851                              unsigned int nodatasum, u64 *size, u64 *end)
4852 {
4853         struct btrfs_file_extent_item *fi;
4854         u64 disk_bytenr;
4855         u64 disk_num_bytes;
4856         u64 extent_num_bytes;
4857         u64 extent_offset;
4858         u64 csum_found;         /* In byte size, sectorsize aligned */
4859         u64 search_start;       /* Logical range start we search for csum */
4860         u64 search_len;         /* Logical range len we search for csum */
4861         unsigned int extent_type;
4862         unsigned int is_hole;
4863         int compressed = 0;
4864         int ret;
4865         int err = 0;
4866
4867         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4868
4869         /* Check inline extent */
4870         extent_type = btrfs_file_extent_type(node, fi);
4871         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4872                 struct btrfs_item *e = btrfs_item_nr(slot);
4873                 u32 item_inline_len;
4874
4875                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4876                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4877                 compressed = btrfs_file_extent_compression(node, fi);
4878                 if (extent_num_bytes == 0) {
4879                         error(
4880                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4881                                 root->objectid, fkey->objectid, fkey->offset);
4882                         err |= FILE_EXTENT_ERROR;
4883                 }
4884                 if (!compressed && extent_num_bytes != item_inline_len) {
4885                         error(
4886                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4887                                 root->objectid, fkey->objectid, fkey->offset,
4888                                 extent_num_bytes, item_inline_len);
4889                         err |= FILE_EXTENT_ERROR;
4890                 }
4891                 *end += extent_num_bytes;
4892                 *size += extent_num_bytes;
4893                 return err;
4894         }
4895
4896         /* Check extent type */
4897         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4898                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4899                 err |= FILE_EXTENT_ERROR;
4900                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4901                       root->objectid, fkey->objectid, fkey->offset);
4902                 return err;
4903         }
4904
4905         /* Check REG_EXTENT/PREALLOC_EXTENT */
4906         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4907         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4908         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4909         extent_offset = btrfs_file_extent_offset(node, fi);
4910         compressed = btrfs_file_extent_compression(node, fi);
4911         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4912
4913         /*
4914          * Check EXTENT_DATA csum
4915          *
4916          * For plain (uncompressed) extent, we should only check the range
4917          * we're referring to, as it's possible that part of prealloc extent
4918          * has been written, and has csum:
4919          *
4920          * |<--- Original large preallocated extent A ---->|
4921          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4922          *      No csum                         Has csum
4923          *
4924          * For compressed extent, we should check the whole range.
4925          */
4926         if (!compressed) {
4927                 search_start = disk_bytenr + extent_offset;
4928                 search_len = extent_num_bytes;
4929         } else {
4930                 search_start = disk_bytenr;
4931                 search_len = disk_num_bytes;
4932         }
4933         ret = count_csum_range(root, search_start, search_len, &csum_found);
4934         if (csum_found > 0 && nodatasum) {
4935                 err |= ODD_CSUM_ITEM;
4936                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4937                       root->objectid, fkey->objectid, fkey->offset);
4938         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4939                    !is_hole && (ret < 0 || csum_found < search_len)) {
4940                 err |= CSUM_ITEM_MISSING;
4941                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4942                       root->objectid, fkey->objectid, fkey->offset,
4943                       csum_found, search_len);
4944         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4945                 err |= ODD_CSUM_ITEM;
4946                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4947                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4948         }
4949
4950         /* Check EXTENT_DATA hole */
4951         if (!no_holes && *end != fkey->offset) {
4952                 err |= FILE_EXTENT_ERROR;
4953                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4954                       root->objectid, fkey->objectid, fkey->offset);
4955         }
4956
4957         *end += extent_num_bytes;
4958         if (!is_hole)
4959                 *size += extent_num_bytes;
4960
4961         return err;
4962 }
4963
4964 /*
4965  * Set inode item nbytes to @nbytes
4966  *
4967  * Returns  0     on success
4968  * Returns  != 0  on error
4969  */
4970 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4971                                       struct btrfs_path *path,
4972                                       u64 ino, u64 nbytes)
4973 {
4974         struct btrfs_trans_handle *trans;
4975         struct btrfs_inode_item *ii;
4976         struct btrfs_key key;
4977         struct btrfs_key research_key;
4978         int err = 0;
4979         int ret;
4980
4981         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4982
4983         key.objectid = ino;
4984         key.type = BTRFS_INODE_ITEM_KEY;
4985         key.offset = 0;
4986
4987         trans = btrfs_start_transaction(root, 1);
4988         if (IS_ERR(trans)) {
4989                 ret = PTR_ERR(trans);
4990                 err |= ret;
4991                 goto out;
4992         }
4993
4994         btrfs_release_path(path);
4995         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4996         if (ret > 0)
4997                 ret = -ENOENT;
4998         if (ret) {
4999                 err |= ret;
5000                 goto fail;
5001         }
5002
5003         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5004                             struct btrfs_inode_item);
5005         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5006         btrfs_mark_buffer_dirty(path->nodes[0]);
5007 fail:
5008         btrfs_commit_transaction(trans, root);
5009 out:
5010         if (ret)
5011                 error("failed to set nbytes in inode %llu root %llu",
5012                       ino, root->root_key.objectid);
5013         else
5014                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5015                        root->root_key.objectid, nbytes);
5016
5017         /* research path */
5018         btrfs_release_path(path);
5019         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5020         err |= ret;
5021
5022         return err;
5023 }
5024
5025 /*
5026  * Set directory inode isize to @isize.
5027  *
5028  * Returns 0     on success.
5029  * Returns != 0  on error.
5030  */
5031 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5032                                    struct btrfs_path *path,
5033                                    u64 ino, u64 isize)
5034 {
5035         struct btrfs_trans_handle *trans;
5036         struct btrfs_inode_item *ii;
5037         struct btrfs_key key;
5038         struct btrfs_key research_key;
5039         int ret;
5040         int err = 0;
5041
5042         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5043
5044         key.objectid = ino;
5045         key.type = BTRFS_INODE_ITEM_KEY;
5046         key.offset = 0;
5047
5048         trans = btrfs_start_transaction(root, 1);
5049         if (IS_ERR(trans)) {
5050                 ret = PTR_ERR(trans);
5051                 err |= ret;
5052                 goto out;
5053         }
5054
5055         btrfs_release_path(path);
5056         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5057         if (ret > 0)
5058                 ret = -ENOENT;
5059         if (ret) {
5060                 err |= ret;
5061                 goto fail;
5062         }
5063
5064         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5065                             struct btrfs_inode_item);
5066         btrfs_set_inode_size(path->nodes[0], ii, isize);
5067         btrfs_mark_buffer_dirty(path->nodes[0]);
5068 fail:
5069         btrfs_commit_transaction(trans, root);
5070 out:
5071         if (ret)
5072                 error("failed to set isize in inode %llu root %llu",
5073                       ino, root->root_key.objectid);
5074         else
5075                 printf("Set isize in inode %llu root %llu to %llu\n",
5076                        ino, root->root_key.objectid, isize);
5077
5078         btrfs_release_path(path);
5079         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5080         err |= ret;
5081
5082         return err;
5083 }
5084
5085 /*
5086  * Wrapper function for btrfs_add_orphan_item().
5087  *
5088  * Returns 0     on success.
5089  * Returns != 0  on error.
5090  */
5091 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5092                                            struct btrfs_path *path, u64 ino)
5093 {
5094         struct btrfs_trans_handle *trans;
5095         struct btrfs_key research_key;
5096         int ret;
5097         int err = 0;
5098
5099         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5100
5101         trans = btrfs_start_transaction(root, 1);
5102         if (IS_ERR(trans)) {
5103                 ret = PTR_ERR(trans);
5104                 err |= ret;
5105                 goto out;
5106         }
5107
5108         btrfs_release_path(path);
5109         ret = btrfs_add_orphan_item(trans, root, path, ino);
5110         err |= ret;
5111         btrfs_commit_transaction(trans, root);
5112 out:
5113         if (ret)
5114                 error("failed to add inode %llu as orphan item root %llu",
5115                       ino, root->root_key.objectid);
5116         else
5117                 printf("Added inode %llu as orphan item root %llu\n",
5118                        ino, root->root_key.objectid);
5119
5120         btrfs_release_path(path);
5121         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5122         err |= ret;
5123
5124         return err;
5125 }
5126
5127 /*
5128  * Check INODE_ITEM and related ITEMs (the same inode number)
5129  * 1. check link count
5130  * 2. check inode ref/extref
5131  * 3. check dir item/index
5132  *
5133  * @ext_ref:    the EXTENDED_IREF feature
5134  *
5135  * Return 0 if no error occurred.
5136  * Return >0 for error or hit the traversal is done(by error bitmap)
5137  */
5138 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5139                             unsigned int ext_ref)
5140 {
5141         struct extent_buffer *node;
5142         struct btrfs_inode_item *ii;
5143         struct btrfs_key key;
5144         u64 inode_id;
5145         u32 mode;
5146         u64 nlink;
5147         u64 nbytes;
5148         u64 isize;
5149         u64 size = 0;
5150         u64 refs = 0;
5151         u64 extent_end = 0;
5152         u64 extent_size = 0;
5153         unsigned int dir;
5154         unsigned int nodatasum;
5155         int slot;
5156         int ret;
5157         int err = 0;
5158
5159         node = path->nodes[0];
5160         slot = path->slots[0];
5161
5162         btrfs_item_key_to_cpu(node, &key, slot);
5163         inode_id = key.objectid;
5164
5165         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5166                 ret = btrfs_next_item(root, path);
5167                 if (ret > 0)
5168                         err |= LAST_ITEM;
5169                 return err;
5170         }
5171
5172         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5173         isize = btrfs_inode_size(node, ii);
5174         nbytes = btrfs_inode_nbytes(node, ii);
5175         mode = btrfs_inode_mode(node, ii);
5176         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5177         nlink = btrfs_inode_nlink(node, ii);
5178         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5179
5180         while (1) {
5181                 ret = btrfs_next_item(root, path);
5182                 if (ret < 0) {
5183                         /* out will fill 'err' rusing current statistics */
5184                         goto out;
5185                 } else if (ret > 0) {
5186                         err |= LAST_ITEM;
5187                         goto out;
5188                 }
5189
5190                 node = path->nodes[0];
5191                 slot = path->slots[0];
5192                 btrfs_item_key_to_cpu(node, &key, slot);
5193                 if (key.objectid != inode_id)
5194                         goto out;
5195
5196                 switch (key.type) {
5197                 case BTRFS_INODE_REF_KEY:
5198                         ret = check_inode_ref(root, &key, node, slot, &refs,
5199                                               mode);
5200                         err |= ret;
5201                         break;
5202                 case BTRFS_INODE_EXTREF_KEY:
5203                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5204                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5205                                         root->objectid, key.objectid,
5206                                         key.offset);
5207                         ret = check_inode_extref(root, &key, node, slot, &refs,
5208                                                  mode);
5209                         err |= ret;
5210                         break;
5211                 case BTRFS_DIR_ITEM_KEY:
5212                 case BTRFS_DIR_INDEX_KEY:
5213                         if (!dir) {
5214                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5215                                         root->objectid, inode_id,
5216                                         imode_to_type(mode), key.objectid,
5217                                         key.offset);
5218                         }
5219                         ret = check_dir_item(root, &key, node, slot, &size,
5220                                              ext_ref);
5221                         err |= ret;
5222                         break;
5223                 case BTRFS_EXTENT_DATA_KEY:
5224                         if (dir) {
5225                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5226                                         root->objectid, inode_id, key.objectid,
5227                                         key.offset);
5228                         }
5229                         ret = check_file_extent(root, &key, node, slot,
5230                                                 nodatasum, &extent_size,
5231                                                 &extent_end);
5232                         err |= ret;
5233                         break;
5234                 case BTRFS_XATTR_ITEM_KEY:
5235                         break;
5236                 default:
5237                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5238                               key.objectid, key.type, key.offset);
5239                 }
5240         }
5241
5242 out:
5243         /* verify INODE_ITEM nlink/isize/nbytes */
5244         if (dir) {
5245                 if (nlink != 1) {
5246                         err |= LINK_COUNT_ERROR;
5247                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5248                               root->objectid, inode_id, nlink);
5249                 }
5250
5251                 /*
5252                  * Just a warning, as dir inode nbytes is just an
5253                  * instructive value.
5254                  */
5255                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5256                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5257                                 root->objectid, inode_id,
5258                                 root->fs_info->nodesize);
5259                 }
5260
5261                 if (isize != size) {
5262                         if (repair)
5263                                 ret = repair_dir_isize_lowmem(root, path,
5264                                                               inode_id, size);
5265                         if (!repair || ret) {
5266                                 err |= ISIZE_ERROR;
5267                                 error(
5268                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5269                                       root->objectid, inode_id, isize, size);
5270                         }
5271                 }
5272         } else {
5273                 if (nlink != refs) {
5274                         err |= LINK_COUNT_ERROR;
5275                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5276                               root->objectid, inode_id, nlink, refs);
5277                 } else if (!nlink) {
5278                         if (repair)
5279                                 ret = repair_inode_orphan_item_lowmem(root,
5280                                                               path, inode_id);
5281                         if (!repair || ret) {
5282                                 err |= ORPHAN_ITEM;
5283                                 error("root %llu INODE[%llu] is orphan item",
5284                                       root->objectid, inode_id);
5285                         }
5286                 }
5287
5288                 if (!nbytes && !no_holes && extent_end < isize) {
5289                         err |= NBYTES_ERROR;
5290                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5291                               root->objectid, inode_id, isize);
5292                 }
5293
5294                 if (nbytes != extent_size) {
5295                         if (repair)
5296                                 ret = repair_inode_nbytes_lowmem(root, path,
5297                                                          inode_id, extent_size);
5298                         if (!repair || ret) {
5299                                 err |= NBYTES_ERROR;
5300                                 error(
5301         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5302                                       root->objectid, inode_id, nbytes,
5303                                       extent_size);
5304                         }
5305                 }
5306         }
5307
5308         return err;
5309 }
5310
5311 /*
5312  * check first root dir's inode_item and inode_ref
5313  *
5314  * returns 0 means no error
5315  * returns >0 means error
5316  * returns <0 means fatal error
5317  */
5318 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5319 {
5320         struct btrfs_path path;
5321         struct btrfs_key key;
5322         struct btrfs_inode_item *ii;
5323         u64 index;
5324         u32 mode;
5325         int err = 0;
5326         int ret;
5327
5328         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5329         key.type = BTRFS_INODE_ITEM_KEY;
5330         key.offset = 0;
5331
5332         /* For root being dropped, we don't need to check first inode */
5333         if (btrfs_root_refs(&root->root_item) == 0 &&
5334             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5335             BTRFS_FIRST_FREE_OBJECTID)
5336                 return 0;
5337
5338         btrfs_init_path(&path);
5339         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5340         if (ret < 0)
5341                 goto out;
5342         if (ret > 0) {
5343                 ret = 0;
5344                 err |= INODE_ITEM_MISSING;
5345         } else {
5346                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5347                                     struct btrfs_inode_item);
5348                 mode = btrfs_inode_mode(path.nodes[0], ii);
5349                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5350                         err |= INODE_ITEM_MISMATCH;
5351         }
5352
5353         /* lookup first inode ref */
5354         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5355         key.type = BTRFS_INODE_REF_KEY;
5356         /* special index value */
5357         index = 0;
5358
5359         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5360         if (ret < 0)
5361                 goto out;
5362         err |= ret;
5363
5364 out:
5365         btrfs_release_path(&path);
5366         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5367                 error("root dir INODE_ITEM is %s",
5368                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5369         if (err & INODE_REF_MISSING)
5370                 error("root dir INODE_REF is missing");
5371
5372         return ret < 0 ? ret : err;
5373 }
5374
5375 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5376                                                 u64 parent, u64 root)
5377 {
5378         struct rb_node *node;
5379         struct tree_backref *back = NULL;
5380         struct tree_backref match = {
5381                 .node = {
5382                         .is_data = 0,
5383                 },
5384         };
5385
5386         if (parent) {
5387                 match.parent = parent;
5388                 match.node.full_backref = 1;
5389         } else {
5390                 match.root = root;
5391         }
5392
5393         node = rb_search(&rec->backref_tree, &match.node.node,
5394                          (rb_compare_keys)compare_extent_backref, NULL);
5395         if (node)
5396                 back = to_tree_backref(rb_node_to_extent_backref(node));
5397
5398         return back;
5399 }
5400
5401 static struct data_backref *find_data_backref(struct extent_record *rec,
5402                                                 u64 parent, u64 root,
5403                                                 u64 owner, u64 offset,
5404                                                 int found_ref,
5405                                                 u64 disk_bytenr, u64 bytes)
5406 {
5407         struct rb_node *node;
5408         struct data_backref *back = NULL;
5409         struct data_backref match = {
5410                 .node = {
5411                         .is_data = 1,
5412                 },
5413                 .owner = owner,
5414                 .offset = offset,
5415                 .bytes = bytes,
5416                 .found_ref = found_ref,
5417                 .disk_bytenr = disk_bytenr,
5418         };
5419
5420         if (parent) {
5421                 match.parent = parent;
5422                 match.node.full_backref = 1;
5423         } else {
5424                 match.root = root;
5425         }
5426
5427         node = rb_search(&rec->backref_tree, &match.node.node,
5428                          (rb_compare_keys)compare_extent_backref, NULL);
5429         if (node)
5430                 back = to_data_backref(rb_node_to_extent_backref(node));
5431
5432         return back;
5433 }
5434 /*
5435  * Iterate all item on the tree and call check_inode_item() to check.
5436  *
5437  * @root:       the root of the tree to be checked.
5438  * @ext_ref:    the EXTENDED_IREF feature
5439  *
5440  * Return 0 if no error found.
5441  * Return <0 for error.
5442  */
5443 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5444 {
5445         struct btrfs_path path;
5446         struct node_refs nrefs;
5447         struct btrfs_root_item *root_item = &root->root_item;
5448         int ret;
5449         int level;
5450         int err = 0;
5451
5452         /*
5453          * We need to manually check the first inode item(256)
5454          * As the following traversal function will only start from
5455          * the first inode item in the leaf, if inode item(256) is missing
5456          * we will just skip it forever.
5457          */
5458         ret = check_fs_first_inode(root, ext_ref);
5459         if (ret < 0)
5460                 return ret;
5461         err |= !!ret;
5462
5463         memset(&nrefs, 0, sizeof(nrefs));
5464         level = btrfs_header_level(root->node);
5465         btrfs_init_path(&path);
5466
5467         if (btrfs_root_refs(root_item) > 0 ||
5468             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5469                 path.nodes[level] = root->node;
5470                 path.slots[level] = 0;
5471                 extent_buffer_get(root->node);
5472         } else {
5473                 struct btrfs_key key;
5474
5475                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5476                 level = root_item->drop_level;
5477                 path.lowest_level = level;
5478                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5479                 if (ret < 0)
5480                         goto out;
5481                 ret = 0;
5482         }
5483
5484         while (1) {
5485                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5486                 err |= !!ret;
5487
5488                 /* if ret is negative, walk shall stop */
5489                 if (ret < 0) {
5490                         ret = err;
5491                         break;
5492                 }
5493
5494                 ret = walk_up_tree_v2(root, &path, &level);
5495                 if (ret != 0) {
5496                         /* Normal exit, reset ret to err */
5497                         ret = err;
5498                         break;
5499                 }
5500         }
5501
5502 out:
5503         btrfs_release_path(&path);
5504         return ret;
5505 }
5506
5507 /*
5508  * Find the relative ref for root_ref and root_backref.
5509  *
5510  * @root:       the root of the root tree.
5511  * @ref_key:    the key of the root ref.
5512  *
5513  * Return 0 if no error occurred.
5514  */
5515 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5516                           struct extent_buffer *node, int slot)
5517 {
5518         struct btrfs_path path;
5519         struct btrfs_key key;
5520         struct btrfs_root_ref *ref;
5521         struct btrfs_root_ref *backref;
5522         char ref_name[BTRFS_NAME_LEN] = {0};
5523         char backref_name[BTRFS_NAME_LEN] = {0};
5524         u64 ref_dirid;
5525         u64 ref_seq;
5526         u32 ref_namelen;
5527         u64 backref_dirid;
5528         u64 backref_seq;
5529         u32 backref_namelen;
5530         u32 len;
5531         int ret;
5532         int err = 0;
5533
5534         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5535         ref_dirid = btrfs_root_ref_dirid(node, ref);
5536         ref_seq = btrfs_root_ref_sequence(node, ref);
5537         ref_namelen = btrfs_root_ref_name_len(node, ref);
5538
5539         if (ref_namelen <= BTRFS_NAME_LEN) {
5540                 len = ref_namelen;
5541         } else {
5542                 len = BTRFS_NAME_LEN;
5543                 warning("%s[%llu %llu] ref_name too long",
5544                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5545                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5546                         ref_key->offset);
5547         }
5548         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5549
5550         /* Find relative root_ref */
5551         key.objectid = ref_key->offset;
5552         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5553         key.offset = ref_key->objectid;
5554
5555         btrfs_init_path(&path);
5556         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5557         if (ret) {
5558                 err |= ROOT_REF_MISSING;
5559                 error("%s[%llu %llu] couldn't find relative ref",
5560                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5561                       "ROOT_REF" : "ROOT_BACKREF",
5562                       ref_key->objectid, ref_key->offset);
5563                 goto out;
5564         }
5565
5566         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5567                                  struct btrfs_root_ref);
5568         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5569         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5570         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5571
5572         if (backref_namelen <= BTRFS_NAME_LEN) {
5573                 len = backref_namelen;
5574         } else {
5575                 len = BTRFS_NAME_LEN;
5576                 warning("%s[%llu %llu] ref_name too long",
5577                         key.type == BTRFS_ROOT_REF_KEY ?
5578                         "ROOT_REF" : "ROOT_BACKREF",
5579                         key.objectid, key.offset);
5580         }
5581         read_extent_buffer(path.nodes[0], backref_name,
5582                            (unsigned long)(backref + 1), len);
5583
5584         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5585             ref_namelen != backref_namelen ||
5586             strncmp(ref_name, backref_name, len)) {
5587                 err |= ROOT_REF_MISMATCH;
5588                 error("%s[%llu %llu] mismatch relative ref",
5589                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5590                       "ROOT_REF" : "ROOT_BACKREF",
5591                       ref_key->objectid, ref_key->offset);
5592         }
5593 out:
5594         btrfs_release_path(&path);
5595         return err;
5596 }
5597
5598 /*
5599  * Check all fs/file tree in low_memory mode.
5600  *
5601  * 1. for fs tree root item, call check_fs_root_v2()
5602  * 2. for fs tree root ref/backref, call check_root_ref()
5603  *
5604  * Return 0 if no error occurred.
5605  */
5606 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5607 {
5608         struct btrfs_root *tree_root = fs_info->tree_root;
5609         struct btrfs_root *cur_root = NULL;
5610         struct btrfs_path path;
5611         struct btrfs_key key;
5612         struct extent_buffer *node;
5613         unsigned int ext_ref;
5614         int slot;
5615         int ret;
5616         int err = 0;
5617
5618         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5619
5620         btrfs_init_path(&path);
5621         key.objectid = BTRFS_FS_TREE_OBJECTID;
5622         key.offset = 0;
5623         key.type = BTRFS_ROOT_ITEM_KEY;
5624
5625         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5626         if (ret < 0) {
5627                 err = ret;
5628                 goto out;
5629         } else if (ret > 0) {
5630                 err = -ENOENT;
5631                 goto out;
5632         }
5633
5634         while (1) {
5635                 node = path.nodes[0];
5636                 slot = path.slots[0];
5637                 btrfs_item_key_to_cpu(node, &key, slot);
5638                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5639                         goto out;
5640                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5641                     fs_root_objectid(key.objectid)) {
5642                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5643                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5644                                                                        &key);
5645                         } else {
5646                                 key.offset = (u64)-1;
5647                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5648                         }
5649
5650                         if (IS_ERR(cur_root)) {
5651                                 error("Fail to read fs/subvol tree: %lld",
5652                                       key.objectid);
5653                                 err = -EIO;
5654                                 goto next;
5655                         }
5656
5657                         ret = check_fs_root_v2(cur_root, ext_ref);
5658                         err |= ret;
5659
5660                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5661                                 btrfs_free_fs_root(cur_root);
5662                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5663                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5664                         ret = check_root_ref(tree_root, &key, node, slot);
5665                         err |= ret;
5666                 }
5667 next:
5668                 ret = btrfs_next_item(tree_root, &path);
5669                 if (ret > 0)
5670                         goto out;
5671                 if (ret < 0) {
5672                         err = ret;
5673                         goto out;
5674                 }
5675         }
5676
5677 out:
5678         btrfs_release_path(&path);
5679         return err;
5680 }
5681
5682 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5683                           struct cache_tree *root_cache)
5684 {
5685         int ret;
5686
5687         if (!ctx.progress_enabled)
5688                 fprintf(stderr, "checking fs roots\n");
5689         if (check_mode == CHECK_MODE_LOWMEM)
5690                 ret = check_fs_roots_v2(fs_info);
5691         else
5692                 ret = check_fs_roots(fs_info, root_cache);
5693
5694         return ret;
5695 }
5696
5697 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5698 {
5699         struct extent_backref *back, *tmp;
5700         struct tree_backref *tback;
5701         struct data_backref *dback;
5702         u64 found = 0;
5703         int err = 0;
5704
5705         rbtree_postorder_for_each_entry_safe(back, tmp,
5706                                              &rec->backref_tree, node) {
5707                 if (!back->found_extent_tree) {
5708                         err = 1;
5709                         if (!print_errs)
5710                                 goto out;
5711                         if (back->is_data) {
5712                                 dback = to_data_backref(back);
5713                                 fprintf(stderr, "Data backref %llu %s %llu"
5714                                         " owner %llu offset %llu num_refs %lu"
5715                                         " not found in extent tree\n",
5716                                         (unsigned long long)rec->start,
5717                                         back->full_backref ?
5718                                         "parent" : "root",
5719                                         back->full_backref ?
5720                                         (unsigned long long)dback->parent:
5721                                         (unsigned long long)dback->root,
5722                                         (unsigned long long)dback->owner,
5723                                         (unsigned long long)dback->offset,
5724                                         (unsigned long)dback->num_refs);
5725                         } else {
5726                                 tback = to_tree_backref(back);
5727                                 fprintf(stderr, "Tree backref %llu parent %llu"
5728                                         " root %llu not found in extent tree\n",
5729                                         (unsigned long long)rec->start,
5730                                         (unsigned long long)tback->parent,
5731                                         (unsigned long long)tback->root);
5732                         }
5733                 }
5734                 if (!back->is_data && !back->found_ref) {
5735                         err = 1;
5736                         if (!print_errs)
5737                                 goto out;
5738                         tback = to_tree_backref(back);
5739                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5740                                 (unsigned long long)rec->start,
5741                                 back->full_backref ? "parent" : "root",
5742                                 back->full_backref ?
5743                                 (unsigned long long)tback->parent :
5744                                 (unsigned long long)tback->root, back);
5745                 }
5746                 if (back->is_data) {
5747                         dback = to_data_backref(back);
5748                         if (dback->found_ref != dback->num_refs) {
5749                                 err = 1;
5750                                 if (!print_errs)
5751                                         goto out;
5752                                 fprintf(stderr, "Incorrect local backref count"
5753                                         " on %llu %s %llu owner %llu"
5754                                         " offset %llu found %u wanted %u back %p\n",
5755                                         (unsigned long long)rec->start,
5756                                         back->full_backref ?
5757                                         "parent" : "root",
5758                                         back->full_backref ?
5759                                         (unsigned long long)dback->parent:
5760                                         (unsigned long long)dback->root,
5761                                         (unsigned long long)dback->owner,
5762                                         (unsigned long long)dback->offset,
5763                                         dback->found_ref, dback->num_refs, back);
5764                         }
5765                         if (dback->disk_bytenr != rec->start) {
5766                                 err = 1;
5767                                 if (!print_errs)
5768                                         goto out;
5769                                 fprintf(stderr, "Backref disk bytenr does not"
5770                                         " match extent record, bytenr=%llu, "
5771                                         "ref bytenr=%llu\n",
5772                                         (unsigned long long)rec->start,
5773                                         (unsigned long long)dback->disk_bytenr);
5774                         }
5775
5776                         if (dback->bytes != rec->nr) {
5777                                 err = 1;
5778                                 if (!print_errs)
5779                                         goto out;
5780                                 fprintf(stderr, "Backref bytes do not match "
5781                                         "extent backref, bytenr=%llu, ref "
5782                                         "bytes=%llu, backref bytes=%llu\n",
5783                                         (unsigned long long)rec->start,
5784                                         (unsigned long long)rec->nr,
5785                                         (unsigned long long)dback->bytes);
5786                         }
5787                 }
5788                 if (!back->is_data) {
5789                         found += 1;
5790                 } else {
5791                         dback = to_data_backref(back);
5792                         found += dback->found_ref;
5793                 }
5794         }
5795         if (found != rec->refs) {
5796                 err = 1;
5797                 if (!print_errs)
5798                         goto out;
5799                 fprintf(stderr, "Incorrect global backref count "
5800                         "on %llu found %llu wanted %llu\n",
5801                         (unsigned long long)rec->start,
5802                         (unsigned long long)found,
5803                         (unsigned long long)rec->refs);
5804         }
5805 out:
5806         return err;
5807 }
5808
5809 static void __free_one_backref(struct rb_node *node)
5810 {
5811         struct extent_backref *back = rb_node_to_extent_backref(node);
5812
5813         free(back);
5814 }
5815
5816 static void free_all_extent_backrefs(struct extent_record *rec)
5817 {
5818         rb_free_nodes(&rec->backref_tree, __free_one_backref);
5819 }
5820
5821 static void free_extent_record_cache(struct cache_tree *extent_cache)
5822 {
5823         struct cache_extent *cache;
5824         struct extent_record *rec;
5825
5826         while (1) {
5827                 cache = first_cache_extent(extent_cache);
5828                 if (!cache)
5829                         break;
5830                 rec = container_of(cache, struct extent_record, cache);
5831                 remove_cache_extent(extent_cache, cache);
5832                 free_all_extent_backrefs(rec);
5833                 free(rec);
5834         }
5835 }
5836
5837 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5838                                  struct extent_record *rec)
5839 {
5840         if (rec->content_checked && rec->owner_ref_checked &&
5841             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5842             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5843             !rec->bad_full_backref && !rec->crossing_stripes &&
5844             !rec->wrong_chunk_type) {
5845                 remove_cache_extent(extent_cache, &rec->cache);
5846                 free_all_extent_backrefs(rec);
5847                 list_del_init(&rec->list);
5848                 free(rec);
5849         }
5850         return 0;
5851 }
5852
5853 static int check_owner_ref(struct btrfs_root *root,
5854                             struct extent_record *rec,
5855                             struct extent_buffer *buf)
5856 {
5857         struct extent_backref *node, *tmp;
5858         struct tree_backref *back;
5859         struct btrfs_root *ref_root;
5860         struct btrfs_key key;
5861         struct btrfs_path path;
5862         struct extent_buffer *parent;
5863         int level;
5864         int found = 0;
5865         int ret;
5866
5867         rbtree_postorder_for_each_entry_safe(node, tmp,
5868                                              &rec->backref_tree, node) {
5869                 if (node->is_data)
5870                         continue;
5871                 if (!node->found_ref)
5872                         continue;
5873                 if (node->full_backref)
5874                         continue;
5875                 back = to_tree_backref(node);
5876                 if (btrfs_header_owner(buf) == back->root)
5877                         return 0;
5878         }
5879         BUG_ON(rec->is_root);
5880
5881         /* try to find the block by search corresponding fs tree */
5882         key.objectid = btrfs_header_owner(buf);
5883         key.type = BTRFS_ROOT_ITEM_KEY;
5884         key.offset = (u64)-1;
5885
5886         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5887         if (IS_ERR(ref_root))
5888                 return 1;
5889
5890         level = btrfs_header_level(buf);
5891         if (level == 0)
5892                 btrfs_item_key_to_cpu(buf, &key, 0);
5893         else
5894                 btrfs_node_key_to_cpu(buf, &key, 0);
5895
5896         btrfs_init_path(&path);
5897         path.lowest_level = level + 1;
5898         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5899         if (ret < 0)
5900                 return 0;
5901
5902         parent = path.nodes[level + 1];
5903         if (parent && buf->start == btrfs_node_blockptr(parent,
5904                                                         path.slots[level + 1]))
5905                 found = 1;
5906
5907         btrfs_release_path(&path);
5908         return found ? 0 : 1;
5909 }
5910
5911 static int is_extent_tree_record(struct extent_record *rec)
5912 {
5913         struct extent_backref *node, *tmp;
5914         struct tree_backref *back;
5915         int is_extent = 0;
5916
5917         rbtree_postorder_for_each_entry_safe(node, tmp,
5918                                              &rec->backref_tree, node) {
5919                 if (node->is_data)
5920                         return 0;
5921                 back = to_tree_backref(node);
5922                 if (node->full_backref)
5923                         return 0;
5924                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5925                         is_extent = 1;
5926         }
5927         return is_extent;
5928 }
5929
5930
5931 static int record_bad_block_io(struct btrfs_fs_info *info,
5932                                struct cache_tree *extent_cache,
5933                                u64 start, u64 len)
5934 {
5935         struct extent_record *rec;
5936         struct cache_extent *cache;
5937         struct btrfs_key key;
5938
5939         cache = lookup_cache_extent(extent_cache, start, len);
5940         if (!cache)
5941                 return 0;
5942
5943         rec = container_of(cache, struct extent_record, cache);
5944         if (!is_extent_tree_record(rec))
5945                 return 0;
5946
5947         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5948         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5949 }
5950
5951 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5952                        struct extent_buffer *buf, int slot)
5953 {
5954         if (btrfs_header_level(buf)) {
5955                 struct btrfs_key_ptr ptr1, ptr2;
5956
5957                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5958                                    sizeof(struct btrfs_key_ptr));
5959                 read_extent_buffer(buf, &ptr2,
5960                                    btrfs_node_key_ptr_offset(slot + 1),
5961                                    sizeof(struct btrfs_key_ptr));
5962                 write_extent_buffer(buf, &ptr1,
5963                                     btrfs_node_key_ptr_offset(slot + 1),
5964                                     sizeof(struct btrfs_key_ptr));
5965                 write_extent_buffer(buf, &ptr2,
5966                                     btrfs_node_key_ptr_offset(slot),
5967                                     sizeof(struct btrfs_key_ptr));
5968                 if (slot == 0) {
5969                         struct btrfs_disk_key key;
5970                         btrfs_node_key(buf, &key, 0);
5971                         btrfs_fixup_low_keys(root, path, &key,
5972                                              btrfs_header_level(buf) + 1);
5973                 }
5974         } else {
5975                 struct btrfs_item *item1, *item2;
5976                 struct btrfs_key k1, k2;
5977                 char *item1_data, *item2_data;
5978                 u32 item1_offset, item2_offset, item1_size, item2_size;
5979
5980                 item1 = btrfs_item_nr(slot);
5981                 item2 = btrfs_item_nr(slot + 1);
5982                 btrfs_item_key_to_cpu(buf, &k1, slot);
5983                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5984                 item1_offset = btrfs_item_offset(buf, item1);
5985                 item2_offset = btrfs_item_offset(buf, item2);
5986                 item1_size = btrfs_item_size(buf, item1);
5987                 item2_size = btrfs_item_size(buf, item2);
5988
5989                 item1_data = malloc(item1_size);
5990                 if (!item1_data)
5991                         return -ENOMEM;
5992                 item2_data = malloc(item2_size);
5993                 if (!item2_data) {
5994                         free(item1_data);
5995                         return -ENOMEM;
5996                 }
5997
5998                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5999                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6000
6001                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6002                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6003                 free(item1_data);
6004                 free(item2_data);
6005
6006                 btrfs_set_item_offset(buf, item1, item2_offset);
6007                 btrfs_set_item_offset(buf, item2, item1_offset);
6008                 btrfs_set_item_size(buf, item1, item2_size);
6009                 btrfs_set_item_size(buf, item2, item1_size);
6010
6011                 path->slots[0] = slot;
6012                 btrfs_set_item_key_unsafe(root, path, &k2);
6013                 path->slots[0] = slot + 1;
6014                 btrfs_set_item_key_unsafe(root, path, &k1);
6015         }
6016         return 0;
6017 }
6018
6019 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6020 {
6021         struct extent_buffer *buf;
6022         struct btrfs_key k1, k2;
6023         int i;
6024         int level = path->lowest_level;
6025         int ret = -EIO;
6026
6027         buf = path->nodes[level];
6028         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6029                 if (level) {
6030                         btrfs_node_key_to_cpu(buf, &k1, i);
6031                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6032                 } else {
6033                         btrfs_item_key_to_cpu(buf, &k1, i);
6034                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6035                 }
6036                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6037                         continue;
6038                 ret = swap_values(root, path, buf, i);
6039                 if (ret)
6040                         break;
6041                 btrfs_mark_buffer_dirty(buf);
6042                 i = 0;
6043         }
6044         return ret;
6045 }
6046
6047 static int delete_bogus_item(struct btrfs_root *root,
6048                              struct btrfs_path *path,
6049                              struct extent_buffer *buf, int slot)
6050 {
6051         struct btrfs_key key;
6052         int nritems = btrfs_header_nritems(buf);
6053
6054         btrfs_item_key_to_cpu(buf, &key, slot);
6055
6056         /* These are all the keys we can deal with missing. */
6057         if (key.type != BTRFS_DIR_INDEX_KEY &&
6058             key.type != BTRFS_EXTENT_ITEM_KEY &&
6059             key.type != BTRFS_METADATA_ITEM_KEY &&
6060             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6061             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6062                 return -1;
6063
6064         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6065                (unsigned long long)key.objectid, key.type,
6066                (unsigned long long)key.offset, slot, buf->start);
6067         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6068                               btrfs_item_nr_offset(slot + 1),
6069                               sizeof(struct btrfs_item) *
6070                               (nritems - slot - 1));
6071         btrfs_set_header_nritems(buf, nritems - 1);
6072         if (slot == 0) {
6073                 struct btrfs_disk_key disk_key;
6074
6075                 btrfs_item_key(buf, &disk_key, 0);
6076                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6077         }
6078         btrfs_mark_buffer_dirty(buf);
6079         return 0;
6080 }
6081
6082 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6083 {
6084         struct extent_buffer *buf;
6085         int i;
6086         int ret = 0;
6087
6088         /* We should only get this for leaves */
6089         BUG_ON(path->lowest_level);
6090         buf = path->nodes[0];
6091 again:
6092         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6093                 unsigned int shift = 0, offset;
6094
6095                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6096                     BTRFS_LEAF_DATA_SIZE(root)) {
6097                         if (btrfs_item_end_nr(buf, i) >
6098                             BTRFS_LEAF_DATA_SIZE(root)) {
6099                                 ret = delete_bogus_item(root, path, buf, i);
6100                                 if (!ret)
6101                                         goto again;
6102                                 fprintf(stderr, "item is off the end of the "
6103                                         "leaf, can't fix\n");
6104                                 ret = -EIO;
6105                                 break;
6106                         }
6107                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6108                                 btrfs_item_end_nr(buf, i);
6109                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6110                            btrfs_item_offset_nr(buf, i - 1)) {
6111                         if (btrfs_item_end_nr(buf, i) >
6112                             btrfs_item_offset_nr(buf, i - 1)) {
6113                                 ret = delete_bogus_item(root, path, buf, i);
6114                                 if (!ret)
6115                                         goto again;
6116                                 fprintf(stderr, "items overlap, can't fix\n");
6117                                 ret = -EIO;
6118                                 break;
6119                         }
6120                         shift = btrfs_item_offset_nr(buf, i - 1) -
6121                                 btrfs_item_end_nr(buf, i);
6122                 }
6123                 if (!shift)
6124                         continue;
6125
6126                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6127                        i, shift, (unsigned long long)buf->start);
6128                 offset = btrfs_item_offset_nr(buf, i);
6129                 memmove_extent_buffer(buf,
6130                                       btrfs_leaf_data(buf) + offset + shift,
6131                                       btrfs_leaf_data(buf) + offset,
6132                                       btrfs_item_size_nr(buf, i));
6133                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6134                                       offset + shift);
6135                 btrfs_mark_buffer_dirty(buf);
6136         }
6137
6138         /*
6139          * We may have moved things, in which case we want to exit so we don't
6140          * write those changes out.  Once we have proper abort functionality in
6141          * progs this can be changed to something nicer.
6142          */
6143         BUG_ON(ret);
6144         return ret;
6145 }
6146
6147 /*
6148  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6149  * then just return -EIO.
6150  */
6151 static int try_to_fix_bad_block(struct btrfs_root *root,
6152                                 struct extent_buffer *buf,
6153                                 enum btrfs_tree_block_status status)
6154 {
6155         struct btrfs_trans_handle *trans;
6156         struct ulist *roots;
6157         struct ulist_node *node;
6158         struct btrfs_root *search_root;
6159         struct btrfs_path path;
6160         struct ulist_iterator iter;
6161         struct btrfs_key root_key, key;
6162         int ret;
6163
6164         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6165             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6166                 return -EIO;
6167
6168         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6169         if (ret)
6170                 return -EIO;
6171
6172         btrfs_init_path(&path);
6173         ULIST_ITER_INIT(&iter);
6174         while ((node = ulist_next(roots, &iter))) {
6175                 root_key.objectid = node->val;
6176                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6177                 root_key.offset = (u64)-1;
6178
6179                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6180                 if (IS_ERR(root)) {
6181                         ret = -EIO;
6182                         break;
6183                 }
6184
6185
6186                 trans = btrfs_start_transaction(search_root, 0);
6187                 if (IS_ERR(trans)) {
6188                         ret = PTR_ERR(trans);
6189                         break;
6190                 }
6191
6192                 path.lowest_level = btrfs_header_level(buf);
6193                 path.skip_check_block = 1;
6194                 if (path.lowest_level)
6195                         btrfs_node_key_to_cpu(buf, &key, 0);
6196                 else
6197                         btrfs_item_key_to_cpu(buf, &key, 0);
6198                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6199                 if (ret) {
6200                         ret = -EIO;
6201                         btrfs_commit_transaction(trans, search_root);
6202                         break;
6203                 }
6204                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6205                         ret = fix_key_order(search_root, &path);
6206                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6207                         ret = fix_item_offset(search_root, &path);
6208                 if (ret) {
6209                         btrfs_commit_transaction(trans, search_root);
6210                         break;
6211                 }
6212                 btrfs_release_path(&path);
6213                 btrfs_commit_transaction(trans, search_root);
6214         }
6215         ulist_free(roots);
6216         btrfs_release_path(&path);
6217         return ret;
6218 }
6219
6220 static int check_block(struct btrfs_root *root,
6221                        struct cache_tree *extent_cache,
6222                        struct extent_buffer *buf, u64 flags)
6223 {
6224         struct extent_record *rec;
6225         struct cache_extent *cache;
6226         struct btrfs_key key;
6227         enum btrfs_tree_block_status status;
6228         int ret = 0;
6229         int level;
6230
6231         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6232         if (!cache)
6233                 return 1;
6234         rec = container_of(cache, struct extent_record, cache);
6235         rec->generation = btrfs_header_generation(buf);
6236
6237         level = btrfs_header_level(buf);
6238         if (btrfs_header_nritems(buf) > 0) {
6239
6240                 if (level == 0)
6241                         btrfs_item_key_to_cpu(buf, &key, 0);
6242                 else
6243                         btrfs_node_key_to_cpu(buf, &key, 0);
6244
6245                 rec->info_objectid = key.objectid;
6246         }
6247         rec->info_level = level;
6248
6249         if (btrfs_is_leaf(buf))
6250                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6251         else
6252                 status = btrfs_check_node(root, &rec->parent_key, buf);
6253
6254         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6255                 if (repair)
6256                         status = try_to_fix_bad_block(root, buf, status);
6257                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6258                         ret = -EIO;
6259                         fprintf(stderr, "bad block %llu\n",
6260                                 (unsigned long long)buf->start);
6261                 } else {
6262                         /*
6263                          * Signal to callers we need to start the scan over
6264                          * again since we'll have cowed blocks.
6265                          */
6266                         ret = -EAGAIN;
6267                 }
6268         } else {
6269                 rec->content_checked = 1;
6270                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6271                         rec->owner_ref_checked = 1;
6272                 else {
6273                         ret = check_owner_ref(root, rec, buf);
6274                         if (!ret)
6275                                 rec->owner_ref_checked = 1;
6276                 }
6277         }
6278         if (!ret)
6279                 maybe_free_extent_rec(extent_cache, rec);
6280         return ret;
6281 }
6282
6283 #if 0
6284 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6285                                                 u64 parent, u64 root)
6286 {
6287         struct list_head *cur = rec->backrefs.next;
6288         struct extent_backref *node;
6289         struct tree_backref *back;
6290
6291         while(cur != &rec->backrefs) {
6292                 node = to_extent_backref(cur);
6293                 cur = cur->next;
6294                 if (node->is_data)
6295                         continue;
6296                 back = to_tree_backref(node);
6297                 if (parent > 0) {
6298                         if (!node->full_backref)
6299                                 continue;
6300                         if (parent == back->parent)
6301                                 return back;
6302                 } else {
6303                         if (node->full_backref)
6304                                 continue;
6305                         if (back->root == root)
6306                                 return back;
6307                 }
6308         }
6309         return NULL;
6310 }
6311 #endif
6312
6313 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6314                                                 u64 parent, u64 root)
6315 {
6316         struct tree_backref *ref = malloc(sizeof(*ref));
6317
6318         if (!ref)
6319                 return NULL;
6320         memset(&ref->node, 0, sizeof(ref->node));
6321         if (parent > 0) {
6322                 ref->parent = parent;
6323                 ref->node.full_backref = 1;
6324         } else {
6325                 ref->root = root;
6326                 ref->node.full_backref = 0;
6327         }
6328
6329         return ref;
6330 }
6331
6332 #if 0
6333 static struct data_backref *find_data_backref(struct extent_record *rec,
6334                                                 u64 parent, u64 root,
6335                                                 u64 owner, u64 offset,
6336                                                 int found_ref,
6337                                                 u64 disk_bytenr, u64 bytes)
6338 {
6339         struct list_head *cur = rec->backrefs.next;
6340         struct extent_backref *node;
6341         struct data_backref *back;
6342
6343         while(cur != &rec->backrefs) {
6344                 node = to_extent_backref(cur);
6345                 cur = cur->next;
6346                 if (!node->is_data)
6347                         continue;
6348                 back = to_data_backref(node);
6349                 if (parent > 0) {
6350                         if (!node->full_backref)
6351                                 continue;
6352                         if (parent == back->parent)
6353                                 return back;
6354                 } else {
6355                         if (node->full_backref)
6356                                 continue;
6357                         if (back->root == root && back->owner == owner &&
6358                             back->offset == offset) {
6359                                 if (found_ref && node->found_ref &&
6360                                     (back->bytes != bytes ||
6361                                     back->disk_bytenr != disk_bytenr))
6362                                         continue;
6363                                 return back;
6364                         }
6365                 }
6366         }
6367         return NULL;
6368 }
6369 #endif
6370
6371 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6372                                                 u64 parent, u64 root,
6373                                                 u64 owner, u64 offset,
6374                                                 u64 max_size)
6375 {
6376         struct data_backref *ref = malloc(sizeof(*ref));
6377
6378         if (!ref)
6379                 return NULL;
6380         memset(&ref->node, 0, sizeof(ref->node));
6381         ref->node.is_data = 1;
6382
6383         if (parent > 0) {
6384                 ref->parent = parent;
6385                 ref->owner = 0;
6386                 ref->offset = 0;
6387                 ref->node.full_backref = 1;
6388         } else {
6389                 ref->root = root;
6390                 ref->owner = owner;
6391                 ref->offset = offset;
6392                 ref->node.full_backref = 0;
6393         }
6394         ref->bytes = max_size;
6395         ref->found_ref = 0;
6396         ref->num_refs = 0;
6397         if (max_size > rec->max_size)
6398                 rec->max_size = max_size;
6399         return ref;
6400 }
6401
6402 /* Check if the type of extent matches with its chunk */
6403 static void check_extent_type(struct extent_record *rec)
6404 {
6405         struct btrfs_block_group_cache *bg_cache;
6406
6407         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6408         if (!bg_cache)
6409                 return;
6410
6411         /* data extent, check chunk directly*/
6412         if (!rec->metadata) {
6413                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6414                         rec->wrong_chunk_type = 1;
6415                 return;
6416         }
6417
6418         /* metadata extent, check the obvious case first */
6419         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6420                                  BTRFS_BLOCK_GROUP_METADATA))) {
6421                 rec->wrong_chunk_type = 1;
6422                 return;
6423         }
6424
6425         /*
6426          * Check SYSTEM extent, as it's also marked as metadata, we can only
6427          * make sure it's a SYSTEM extent by its backref
6428          */
6429         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6430                 struct extent_backref *node;
6431                 struct tree_backref *tback;
6432                 u64 bg_type;
6433
6434                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6435                 if (node->is_data) {
6436                         /* tree block shouldn't have data backref */
6437                         rec->wrong_chunk_type = 1;
6438                         return;
6439                 }
6440                 tback = container_of(node, struct tree_backref, node);
6441
6442                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6443                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6444                 else
6445                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6446                 if (!(bg_cache->flags & bg_type))
6447                         rec->wrong_chunk_type = 1;
6448         }
6449 }
6450
6451 /*
6452  * Allocate a new extent record, fill default values from @tmpl and insert int
6453  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6454  * the cache, otherwise it fails.
6455  */
6456 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6457                 struct extent_record *tmpl)
6458 {
6459         struct extent_record *rec;
6460         int ret = 0;
6461
6462         BUG_ON(tmpl->max_size == 0);
6463         rec = malloc(sizeof(*rec));
6464         if (!rec)
6465                 return -ENOMEM;
6466         rec->start = tmpl->start;
6467         rec->max_size = tmpl->max_size;
6468         rec->nr = max(tmpl->nr, tmpl->max_size);
6469         rec->found_rec = tmpl->found_rec;
6470         rec->content_checked = tmpl->content_checked;
6471         rec->owner_ref_checked = tmpl->owner_ref_checked;
6472         rec->num_duplicates = 0;
6473         rec->metadata = tmpl->metadata;
6474         rec->flag_block_full_backref = FLAG_UNSET;
6475         rec->bad_full_backref = 0;
6476         rec->crossing_stripes = 0;
6477         rec->wrong_chunk_type = 0;
6478         rec->is_root = tmpl->is_root;
6479         rec->refs = tmpl->refs;
6480         rec->extent_item_refs = tmpl->extent_item_refs;
6481         rec->parent_generation = tmpl->parent_generation;
6482         INIT_LIST_HEAD(&rec->backrefs);
6483         INIT_LIST_HEAD(&rec->dups);
6484         INIT_LIST_HEAD(&rec->list);
6485         rec->backref_tree = RB_ROOT;
6486         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6487         rec->cache.start = tmpl->start;
6488         rec->cache.size = tmpl->nr;
6489         ret = insert_cache_extent(extent_cache, &rec->cache);
6490         if (ret) {
6491                 free(rec);
6492                 return ret;
6493         }
6494         bytes_used += rec->nr;
6495
6496         if (tmpl->metadata)
6497                 rec->crossing_stripes = check_crossing_stripes(global_info,
6498                                 rec->start, global_info->nodesize);
6499         check_extent_type(rec);
6500         return ret;
6501 }
6502
6503 /*
6504  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6505  * some are hints:
6506  * - refs              - if found, increase refs
6507  * - is_root           - if found, set
6508  * - content_checked   - if found, set
6509  * - owner_ref_checked - if found, set
6510  *
6511  * If not found, create a new one, initialize and insert.
6512  */
6513 static int add_extent_rec(struct cache_tree *extent_cache,
6514                 struct extent_record *tmpl)
6515 {
6516         struct extent_record *rec;
6517         struct cache_extent *cache;
6518         int ret = 0;
6519         int dup = 0;
6520
6521         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6522         if (cache) {
6523                 rec = container_of(cache, struct extent_record, cache);
6524                 if (tmpl->refs)
6525                         rec->refs++;
6526                 if (rec->nr == 1)
6527                         rec->nr = max(tmpl->nr, tmpl->max_size);
6528
6529                 /*
6530                  * We need to make sure to reset nr to whatever the extent
6531                  * record says was the real size, this way we can compare it to
6532                  * the backrefs.
6533                  */
6534                 if (tmpl->found_rec) {
6535                         if (tmpl->start != rec->start || rec->found_rec) {
6536                                 struct extent_record *tmp;
6537
6538                                 dup = 1;
6539                                 if (list_empty(&rec->list))
6540                                         list_add_tail(&rec->list,
6541                                                       &duplicate_extents);
6542
6543                                 /*
6544                                  * We have to do this song and dance in case we
6545                                  * find an extent record that falls inside of
6546                                  * our current extent record but does not have
6547                                  * the same objectid.
6548                                  */
6549                                 tmp = malloc(sizeof(*tmp));
6550                                 if (!tmp)
6551                                         return -ENOMEM;
6552                                 tmp->start = tmpl->start;
6553                                 tmp->max_size = tmpl->max_size;
6554                                 tmp->nr = tmpl->nr;
6555                                 tmp->found_rec = 1;
6556                                 tmp->metadata = tmpl->metadata;
6557                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6558                                 INIT_LIST_HEAD(&tmp->list);
6559                                 list_add_tail(&tmp->list, &rec->dups);
6560                                 rec->num_duplicates++;
6561                         } else {
6562                                 rec->nr = tmpl->nr;
6563                                 rec->found_rec = 1;
6564                         }
6565                 }
6566
6567                 if (tmpl->extent_item_refs && !dup) {
6568                         if (rec->extent_item_refs) {
6569                                 fprintf(stderr, "block %llu rec "
6570                                         "extent_item_refs %llu, passed %llu\n",
6571                                         (unsigned long long)tmpl->start,
6572                                         (unsigned long long)
6573                                                         rec->extent_item_refs,
6574                                         (unsigned long long)tmpl->extent_item_refs);
6575                         }
6576                         rec->extent_item_refs = tmpl->extent_item_refs;
6577                 }
6578                 if (tmpl->is_root)
6579                         rec->is_root = 1;
6580                 if (tmpl->content_checked)
6581                         rec->content_checked = 1;
6582                 if (tmpl->owner_ref_checked)
6583                         rec->owner_ref_checked = 1;
6584                 memcpy(&rec->parent_key, &tmpl->parent_key,
6585                                 sizeof(tmpl->parent_key));
6586                 if (tmpl->parent_generation)
6587                         rec->parent_generation = tmpl->parent_generation;
6588                 if (rec->max_size < tmpl->max_size)
6589                         rec->max_size = tmpl->max_size;
6590
6591                 /*
6592                  * A metadata extent can't cross stripe_len boundary, otherwise
6593                  * kernel scrub won't be able to handle it.
6594                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6595                  * it.
6596                  */
6597                 if (tmpl->metadata)
6598                         rec->crossing_stripes = check_crossing_stripes(
6599                                         global_info, rec->start,
6600                                         global_info->nodesize);
6601                 check_extent_type(rec);
6602                 maybe_free_extent_rec(extent_cache, rec);
6603                 return ret;
6604         }
6605
6606         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6607
6608         return ret;
6609 }
6610
6611 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6612                             u64 parent, u64 root, int found_ref)
6613 {
6614         struct extent_record *rec;
6615         struct tree_backref *back;
6616         struct cache_extent *cache;
6617         int ret;
6618         bool insert = false;
6619
6620         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6621         if (!cache) {
6622                 struct extent_record tmpl;
6623
6624                 memset(&tmpl, 0, sizeof(tmpl));
6625                 tmpl.start = bytenr;
6626                 tmpl.nr = 1;
6627                 tmpl.metadata = 1;
6628                 tmpl.max_size = 1;
6629
6630                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6631                 if (ret)
6632                         return ret;
6633
6634                 /* really a bug in cache_extent implement now */
6635                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6636                 if (!cache)
6637                         return -ENOENT;
6638         }
6639
6640         rec = container_of(cache, struct extent_record, cache);
6641         if (rec->start != bytenr) {
6642                 /*
6643                  * Several cause, from unaligned bytenr to over lapping extents
6644                  */
6645                 return -EEXIST;
6646         }
6647
6648         back = find_tree_backref(rec, parent, root);
6649         if (!back) {
6650                 back = alloc_tree_backref(rec, parent, root);
6651                 if (!back)
6652                         return -ENOMEM;
6653                 insert = true;
6654         }
6655
6656         if (found_ref) {
6657                 if (back->node.found_ref) {
6658                         fprintf(stderr, "Extent back ref already exists "
6659                                 "for %llu parent %llu root %llu \n",
6660                                 (unsigned long long)bytenr,
6661                                 (unsigned long long)parent,
6662                                 (unsigned long long)root);
6663                 }
6664                 back->node.found_ref = 1;
6665         } else {
6666                 if (back->node.found_extent_tree) {
6667                         fprintf(stderr, "Extent back ref already exists "
6668                                 "for %llu parent %llu root %llu \n",
6669                                 (unsigned long long)bytenr,
6670                                 (unsigned long long)parent,
6671                                 (unsigned long long)root);
6672                 }
6673                 back->node.found_extent_tree = 1;
6674         }
6675         if (insert)
6676                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6677                         compare_extent_backref));
6678         check_extent_type(rec);
6679         maybe_free_extent_rec(extent_cache, rec);
6680         return 0;
6681 }
6682
6683 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6684                             u64 parent, u64 root, u64 owner, u64 offset,
6685                             u32 num_refs, int found_ref, u64 max_size)
6686 {
6687         struct extent_record *rec;
6688         struct data_backref *back;
6689         struct cache_extent *cache;
6690         int ret;
6691         bool insert = false;
6692
6693         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6694         if (!cache) {
6695                 struct extent_record tmpl;
6696
6697                 memset(&tmpl, 0, sizeof(tmpl));
6698                 tmpl.start = bytenr;
6699                 tmpl.nr = 1;
6700                 tmpl.max_size = max_size;
6701
6702                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6703                 if (ret)
6704                         return ret;
6705
6706                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6707                 if (!cache)
6708                         abort();
6709         }
6710
6711         rec = container_of(cache, struct extent_record, cache);
6712         if (rec->max_size < max_size)
6713                 rec->max_size = max_size;
6714
6715         /*
6716          * If found_ref is set then max_size is the real size and must match the
6717          * existing refs.  So if we have already found a ref then we need to
6718          * make sure that this ref matches the existing one, otherwise we need
6719          * to add a new backref so we can notice that the backrefs don't match
6720          * and we need to figure out who is telling the truth.  This is to
6721          * account for that awful fsync bug I introduced where we'd end up with
6722          * a btrfs_file_extent_item that would have its length include multiple
6723          * prealloc extents or point inside of a prealloc extent.
6724          */
6725         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6726                                  bytenr, max_size);
6727         if (!back) {
6728                 back = alloc_data_backref(rec, parent, root, owner, offset,
6729                                           max_size);
6730                 BUG_ON(!back);
6731                 insert = true;
6732         }
6733
6734         if (found_ref) {
6735                 BUG_ON(num_refs != 1);
6736                 if (back->node.found_ref)
6737                         BUG_ON(back->bytes != max_size);
6738                 back->node.found_ref = 1;
6739                 back->found_ref += 1;
6740                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6741                         back->bytes = max_size;
6742                         back->disk_bytenr = bytenr;
6743
6744                         /* Need to reinsert if not already in the tree */
6745                         if (!insert) {
6746                                 rb_erase(&back->node.node, &rec->backref_tree);
6747                                 insert = true;
6748                         }
6749                 }
6750                 rec->refs += 1;
6751                 rec->content_checked = 1;
6752                 rec->owner_ref_checked = 1;
6753         } else {
6754                 if (back->node.found_extent_tree) {
6755                         fprintf(stderr, "Extent back ref already exists "
6756                                 "for %llu parent %llu root %llu "
6757                                 "owner %llu offset %llu num_refs %lu\n",
6758                                 (unsigned long long)bytenr,
6759                                 (unsigned long long)parent,
6760                                 (unsigned long long)root,
6761                                 (unsigned long long)owner,
6762                                 (unsigned long long)offset,
6763                                 (unsigned long)num_refs);
6764                 }
6765                 back->num_refs = num_refs;
6766                 back->node.found_extent_tree = 1;
6767         }
6768         if (insert)
6769                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6770                         compare_extent_backref));
6771
6772         maybe_free_extent_rec(extent_cache, rec);
6773         return 0;
6774 }
6775
6776 static int add_pending(struct cache_tree *pending,
6777                        struct cache_tree *seen, u64 bytenr, u32 size)
6778 {
6779         int ret;
6780         ret = add_cache_extent(seen, bytenr, size);
6781         if (ret)
6782                 return ret;
6783         add_cache_extent(pending, bytenr, size);
6784         return 0;
6785 }
6786
6787 static int pick_next_pending(struct cache_tree *pending,
6788                         struct cache_tree *reada,
6789                         struct cache_tree *nodes,
6790                         u64 last, struct block_info *bits, int bits_nr,
6791                         int *reada_bits)
6792 {
6793         unsigned long node_start = last;
6794         struct cache_extent *cache;
6795         int ret;
6796
6797         cache = search_cache_extent(reada, 0);
6798         if (cache) {
6799                 bits[0].start = cache->start;
6800                 bits[0].size = cache->size;
6801                 *reada_bits = 1;
6802                 return 1;
6803         }
6804         *reada_bits = 0;
6805         if (node_start > 32768)
6806                 node_start -= 32768;
6807
6808         cache = search_cache_extent(nodes, node_start);
6809         if (!cache)
6810                 cache = search_cache_extent(nodes, 0);
6811
6812         if (!cache) {
6813                  cache = search_cache_extent(pending, 0);
6814                  if (!cache)
6815                          return 0;
6816                  ret = 0;
6817                  do {
6818                          bits[ret].start = cache->start;
6819                          bits[ret].size = cache->size;
6820                          cache = next_cache_extent(cache);
6821                          ret++;
6822                  } while (cache && ret < bits_nr);
6823                  return ret;
6824         }
6825
6826         ret = 0;
6827         do {
6828                 bits[ret].start = cache->start;
6829                 bits[ret].size = cache->size;
6830                 cache = next_cache_extent(cache);
6831                 ret++;
6832         } while (cache && ret < bits_nr);
6833
6834         if (bits_nr - ret > 8) {
6835                 u64 lookup = bits[0].start + bits[0].size;
6836                 struct cache_extent *next;
6837                 next = search_cache_extent(pending, lookup);
6838                 while(next) {
6839                         if (next->start - lookup > 32768)
6840                                 break;
6841                         bits[ret].start = next->start;
6842                         bits[ret].size = next->size;
6843                         lookup = next->start + next->size;
6844                         ret++;
6845                         if (ret == bits_nr)
6846                                 break;
6847                         next = next_cache_extent(next);
6848                         if (!next)
6849                                 break;
6850                 }
6851         }
6852         return ret;
6853 }
6854
6855 static void free_chunk_record(struct cache_extent *cache)
6856 {
6857         struct chunk_record *rec;
6858
6859         rec = container_of(cache, struct chunk_record, cache);
6860         list_del_init(&rec->list);
6861         list_del_init(&rec->dextents);
6862         free(rec);
6863 }
6864
6865 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6866 {
6867         cache_tree_free_extents(chunk_cache, free_chunk_record);
6868 }
6869
6870 static void free_device_record(struct rb_node *node)
6871 {
6872         struct device_record *rec;
6873
6874         rec = container_of(node, struct device_record, node);
6875         free(rec);
6876 }
6877
6878 FREE_RB_BASED_TREE(device_cache, free_device_record);
6879
6880 int insert_block_group_record(struct block_group_tree *tree,
6881                               struct block_group_record *bg_rec)
6882 {
6883         int ret;
6884
6885         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6886         if (ret)
6887                 return ret;
6888
6889         list_add_tail(&bg_rec->list, &tree->block_groups);
6890         return 0;
6891 }
6892
6893 static void free_block_group_record(struct cache_extent *cache)
6894 {
6895         struct block_group_record *rec;
6896
6897         rec = container_of(cache, struct block_group_record, cache);
6898         list_del_init(&rec->list);
6899         free(rec);
6900 }
6901
6902 void free_block_group_tree(struct block_group_tree *tree)
6903 {
6904         cache_tree_free_extents(&tree->tree, free_block_group_record);
6905 }
6906
6907 int insert_device_extent_record(struct device_extent_tree *tree,
6908                                 struct device_extent_record *de_rec)
6909 {
6910         int ret;
6911
6912         /*
6913          * Device extent is a bit different from the other extents, because
6914          * the extents which belong to the different devices may have the
6915          * same start and size, so we need use the special extent cache
6916          * search/insert functions.
6917          */
6918         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6919         if (ret)
6920                 return ret;
6921
6922         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6923         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6924         return 0;
6925 }
6926
6927 static void free_device_extent_record(struct cache_extent *cache)
6928 {
6929         struct device_extent_record *rec;
6930
6931         rec = container_of(cache, struct device_extent_record, cache);
6932         if (!list_empty(&rec->chunk_list))
6933                 list_del_init(&rec->chunk_list);
6934         if (!list_empty(&rec->device_list))
6935                 list_del_init(&rec->device_list);
6936         free(rec);
6937 }
6938
6939 void free_device_extent_tree(struct device_extent_tree *tree)
6940 {
6941         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6942 }
6943
6944 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6945 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6946                                  struct extent_buffer *leaf, int slot)
6947 {
6948         struct btrfs_extent_ref_v0 *ref0;
6949         struct btrfs_key key;
6950         int ret;
6951
6952         btrfs_item_key_to_cpu(leaf, &key, slot);
6953         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6954         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6955                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6956                                 0, 0);
6957         } else {
6958                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6959                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6960         }
6961         return ret;
6962 }
6963 #endif
6964
6965 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6966                                             struct btrfs_key *key,
6967                                             int slot)
6968 {
6969         struct btrfs_chunk *ptr;
6970         struct chunk_record *rec;
6971         int num_stripes, i;
6972
6973         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6974         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6975
6976         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6977         if (!rec) {
6978                 fprintf(stderr, "memory allocation failed\n");
6979                 exit(-1);
6980         }
6981
6982         INIT_LIST_HEAD(&rec->list);
6983         INIT_LIST_HEAD(&rec->dextents);
6984         rec->bg_rec = NULL;
6985
6986         rec->cache.start = key->offset;
6987         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6988
6989         rec->generation = btrfs_header_generation(leaf);
6990
6991         rec->objectid = key->objectid;
6992         rec->type = key->type;
6993         rec->offset = key->offset;
6994
6995         rec->length = rec->cache.size;
6996         rec->owner = btrfs_chunk_owner(leaf, ptr);
6997         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6998         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6999         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7000         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7001         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7002         rec->num_stripes = num_stripes;
7003         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7004
7005         for (i = 0; i < rec->num_stripes; ++i) {
7006                 rec->stripes[i].devid =
7007                         btrfs_stripe_devid_nr(leaf, ptr, i);
7008                 rec->stripes[i].offset =
7009                         btrfs_stripe_offset_nr(leaf, ptr, i);
7010                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7011                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7012                                 BTRFS_UUID_SIZE);
7013         }
7014
7015         return rec;
7016 }
7017
7018 static int process_chunk_item(struct cache_tree *chunk_cache,
7019                               struct btrfs_key *key, struct extent_buffer *eb,
7020                               int slot)
7021 {
7022         struct chunk_record *rec;
7023         struct btrfs_chunk *chunk;
7024         int ret = 0;
7025
7026         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7027         /*
7028          * Do extra check for this chunk item,
7029          *
7030          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7031          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7032          * and owner<->key_type check.
7033          */
7034         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7035                                       key->offset);
7036         if (ret < 0) {
7037                 error("chunk(%llu, %llu) is not valid, ignore it",
7038                       key->offset, btrfs_chunk_length(eb, chunk));
7039                 return 0;
7040         }
7041         rec = btrfs_new_chunk_record(eb, key, slot);
7042         ret = insert_cache_extent(chunk_cache, &rec->cache);
7043         if (ret) {
7044                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7045                         rec->offset, rec->length);
7046                 free(rec);
7047         }
7048
7049         return ret;
7050 }
7051
7052 static int process_device_item(struct rb_root *dev_cache,
7053                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7054 {
7055         struct btrfs_dev_item *ptr;
7056         struct device_record *rec;
7057         int ret = 0;
7058
7059         ptr = btrfs_item_ptr(eb,
7060                 slot, struct btrfs_dev_item);
7061
7062         rec = malloc(sizeof(*rec));
7063         if (!rec) {
7064                 fprintf(stderr, "memory allocation failed\n");
7065                 return -ENOMEM;
7066         }
7067
7068         rec->devid = key->offset;
7069         rec->generation = btrfs_header_generation(eb);
7070
7071         rec->objectid = key->objectid;
7072         rec->type = key->type;
7073         rec->offset = key->offset;
7074
7075         rec->devid = btrfs_device_id(eb, ptr);
7076         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7077         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7078
7079         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7080         if (ret) {
7081                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7082                 free(rec);
7083         }
7084
7085         return ret;
7086 }
7087
7088 struct block_group_record *
7089 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7090                              int slot)
7091 {
7092         struct btrfs_block_group_item *ptr;
7093         struct block_group_record *rec;
7094
7095         rec = calloc(1, sizeof(*rec));
7096         if (!rec) {
7097                 fprintf(stderr, "memory allocation failed\n");
7098                 exit(-1);
7099         }
7100
7101         rec->cache.start = key->objectid;
7102         rec->cache.size = key->offset;
7103
7104         rec->generation = btrfs_header_generation(leaf);
7105
7106         rec->objectid = key->objectid;
7107         rec->type = key->type;
7108         rec->offset = key->offset;
7109
7110         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7111         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7112
7113         INIT_LIST_HEAD(&rec->list);
7114
7115         return rec;
7116 }
7117
7118 static int process_block_group_item(struct block_group_tree *block_group_cache,
7119                                     struct btrfs_key *key,
7120                                     struct extent_buffer *eb, int slot)
7121 {
7122         struct block_group_record *rec;
7123         int ret = 0;
7124
7125         rec = btrfs_new_block_group_record(eb, key, slot);
7126         ret = insert_block_group_record(block_group_cache, rec);
7127         if (ret) {
7128                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7129                         rec->objectid, rec->offset);
7130                 free(rec);
7131         }
7132
7133         return ret;
7134 }
7135
7136 struct device_extent_record *
7137 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7138                                struct btrfs_key *key, int slot)
7139 {
7140         struct device_extent_record *rec;
7141         struct btrfs_dev_extent *ptr;
7142
7143         rec = calloc(1, sizeof(*rec));
7144         if (!rec) {
7145                 fprintf(stderr, "memory allocation failed\n");
7146                 exit(-1);
7147         }
7148
7149         rec->cache.objectid = key->objectid;
7150         rec->cache.start = key->offset;
7151
7152         rec->generation = btrfs_header_generation(leaf);
7153
7154         rec->objectid = key->objectid;
7155         rec->type = key->type;
7156         rec->offset = key->offset;
7157
7158         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7159         rec->chunk_objecteid =
7160                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7161         rec->chunk_offset =
7162                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7163         rec->length = btrfs_dev_extent_length(leaf, ptr);
7164         rec->cache.size = rec->length;
7165
7166         INIT_LIST_HEAD(&rec->chunk_list);
7167         INIT_LIST_HEAD(&rec->device_list);
7168
7169         return rec;
7170 }
7171
7172 static int
7173 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7174                            struct btrfs_key *key, struct extent_buffer *eb,
7175                            int slot)
7176 {
7177         struct device_extent_record *rec;
7178         int ret;
7179
7180         rec = btrfs_new_device_extent_record(eb, key, slot);
7181         ret = insert_device_extent_record(dev_extent_cache, rec);
7182         if (ret) {
7183                 fprintf(stderr,
7184                         "Device extent[%llu, %llu, %llu] existed.\n",
7185                         rec->objectid, rec->offset, rec->length);
7186                 free(rec);
7187         }
7188
7189         return ret;
7190 }
7191
7192 static int process_extent_item(struct btrfs_root *root,
7193                                struct cache_tree *extent_cache,
7194                                struct extent_buffer *eb, int slot)
7195 {
7196         struct btrfs_extent_item *ei;
7197         struct btrfs_extent_inline_ref *iref;
7198         struct btrfs_extent_data_ref *dref;
7199         struct btrfs_shared_data_ref *sref;
7200         struct btrfs_key key;
7201         struct extent_record tmpl;
7202         unsigned long end;
7203         unsigned long ptr;
7204         int ret;
7205         int type;
7206         u32 item_size = btrfs_item_size_nr(eb, slot);
7207         u64 refs = 0;
7208         u64 offset;
7209         u64 num_bytes;
7210         int metadata = 0;
7211
7212         btrfs_item_key_to_cpu(eb, &key, slot);
7213
7214         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7215                 metadata = 1;
7216                 num_bytes = root->fs_info->nodesize;
7217         } else {
7218                 num_bytes = key.offset;
7219         }
7220
7221         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7222                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7223                       key.objectid, root->fs_info->sectorsize);
7224                 return -EIO;
7225         }
7226         if (item_size < sizeof(*ei)) {
7227 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7228                 struct btrfs_extent_item_v0 *ei0;
7229                 BUG_ON(item_size != sizeof(*ei0));
7230                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7231                 refs = btrfs_extent_refs_v0(eb, ei0);
7232 #else
7233                 BUG();
7234 #endif
7235                 memset(&tmpl, 0, sizeof(tmpl));
7236                 tmpl.start = key.objectid;
7237                 tmpl.nr = num_bytes;
7238                 tmpl.extent_item_refs = refs;
7239                 tmpl.metadata = metadata;
7240                 tmpl.found_rec = 1;
7241                 tmpl.max_size = num_bytes;
7242
7243                 return add_extent_rec(extent_cache, &tmpl);
7244         }
7245
7246         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7247         refs = btrfs_extent_refs(eb, ei);
7248         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7249                 metadata = 1;
7250         else
7251                 metadata = 0;
7252         if (metadata && num_bytes != root->fs_info->nodesize) {
7253                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7254                       num_bytes, root->fs_info->nodesize);
7255                 return -EIO;
7256         }
7257         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7258                 error("ignore invalid data extent, length %llu is not aligned to %u",
7259                       num_bytes, root->fs_info->sectorsize);
7260                 return -EIO;
7261         }
7262
7263         memset(&tmpl, 0, sizeof(tmpl));
7264         tmpl.start = key.objectid;
7265         tmpl.nr = num_bytes;
7266         tmpl.extent_item_refs = refs;
7267         tmpl.metadata = metadata;
7268         tmpl.found_rec = 1;
7269         tmpl.max_size = num_bytes;
7270         add_extent_rec(extent_cache, &tmpl);
7271
7272         ptr = (unsigned long)(ei + 1);
7273         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7274             key.type == BTRFS_EXTENT_ITEM_KEY)
7275                 ptr += sizeof(struct btrfs_tree_block_info);
7276
7277         end = (unsigned long)ei + item_size;
7278         while (ptr < end) {
7279                 iref = (struct btrfs_extent_inline_ref *)ptr;
7280                 type = btrfs_extent_inline_ref_type(eb, iref);
7281                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7282                 switch (type) {
7283                 case BTRFS_TREE_BLOCK_REF_KEY:
7284                         ret = add_tree_backref(extent_cache, key.objectid,
7285                                         0, offset, 0);
7286                         if (ret < 0)
7287                                 error(
7288                         "add_tree_backref failed (extent items tree block): %s",
7289                                       strerror(-ret));
7290                         break;
7291                 case BTRFS_SHARED_BLOCK_REF_KEY:
7292                         ret = add_tree_backref(extent_cache, key.objectid,
7293                                         offset, 0, 0);
7294                         if (ret < 0)
7295                                 error(
7296                         "add_tree_backref failed (extent items shared block): %s",
7297                                       strerror(-ret));
7298                         break;
7299                 case BTRFS_EXTENT_DATA_REF_KEY:
7300                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7301                         add_data_backref(extent_cache, key.objectid, 0,
7302                                         btrfs_extent_data_ref_root(eb, dref),
7303                                         btrfs_extent_data_ref_objectid(eb,
7304                                                                        dref),
7305                                         btrfs_extent_data_ref_offset(eb, dref),
7306                                         btrfs_extent_data_ref_count(eb, dref),
7307                                         0, num_bytes);
7308                         break;
7309                 case BTRFS_SHARED_DATA_REF_KEY:
7310                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7311                         add_data_backref(extent_cache, key.objectid, offset,
7312                                         0, 0, 0,
7313                                         btrfs_shared_data_ref_count(eb, sref),
7314                                         0, num_bytes);
7315                         break;
7316                 default:
7317                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7318                                 key.objectid, key.type, num_bytes);
7319                         goto out;
7320                 }
7321                 ptr += btrfs_extent_inline_ref_size(type);
7322         }
7323         WARN_ON(ptr > end);
7324 out:
7325         return 0;
7326 }
7327
7328 static int check_cache_range(struct btrfs_root *root,
7329                              struct btrfs_block_group_cache *cache,
7330                              u64 offset, u64 bytes)
7331 {
7332         struct btrfs_free_space *entry;
7333         u64 *logical;
7334         u64 bytenr;
7335         int stripe_len;
7336         int i, nr, ret;
7337
7338         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7339                 bytenr = btrfs_sb_offset(i);
7340                 ret = btrfs_rmap_block(root->fs_info,
7341                                        cache->key.objectid, bytenr, 0,
7342                                        &logical, &nr, &stripe_len);
7343                 if (ret)
7344                         return ret;
7345
7346                 while (nr--) {
7347                         if (logical[nr] + stripe_len <= offset)
7348                                 continue;
7349                         if (offset + bytes <= logical[nr])
7350                                 continue;
7351                         if (logical[nr] == offset) {
7352                                 if (stripe_len >= bytes) {
7353                                         free(logical);
7354                                         return 0;
7355                                 }
7356                                 bytes -= stripe_len;
7357                                 offset += stripe_len;
7358                         } else if (logical[nr] < offset) {
7359                                 if (logical[nr] + stripe_len >=
7360                                     offset + bytes) {
7361                                         free(logical);
7362                                         return 0;
7363                                 }
7364                                 bytes = (offset + bytes) -
7365                                         (logical[nr] + stripe_len);
7366                                 offset = logical[nr] + stripe_len;
7367                         } else {
7368                                 /*
7369                                  * Could be tricky, the super may land in the
7370                                  * middle of the area we're checking.  First
7371                                  * check the easiest case, it's at the end.
7372                                  */
7373                                 if (logical[nr] + stripe_len >=
7374                                     bytes + offset) {
7375                                         bytes = logical[nr] - offset;
7376                                         continue;
7377                                 }
7378
7379                                 /* Check the left side */
7380                                 ret = check_cache_range(root, cache,
7381                                                         offset,
7382                                                         logical[nr] - offset);
7383                                 if (ret) {
7384                                         free(logical);
7385                                         return ret;
7386                                 }
7387
7388                                 /* Now we continue with the right side */
7389                                 bytes = (offset + bytes) -
7390                                         (logical[nr] + stripe_len);
7391                                 offset = logical[nr] + stripe_len;
7392                         }
7393                 }
7394
7395                 free(logical);
7396         }
7397
7398         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7399         if (!entry) {
7400                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7401                         offset, offset+bytes);
7402                 return -EINVAL;
7403         }
7404
7405         if (entry->offset != offset) {
7406                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7407                         entry->offset);
7408                 return -EINVAL;
7409         }
7410
7411         if (entry->bytes != bytes) {
7412                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7413                         bytes, entry->bytes, offset);
7414                 return -EINVAL;
7415         }
7416
7417         unlink_free_space(cache->free_space_ctl, entry);
7418         free(entry);
7419         return 0;
7420 }
7421
7422 static int verify_space_cache(struct btrfs_root *root,
7423                               struct btrfs_block_group_cache *cache)
7424 {
7425         struct btrfs_path path;
7426         struct extent_buffer *leaf;
7427         struct btrfs_key key;
7428         u64 last;
7429         int ret = 0;
7430
7431         root = root->fs_info->extent_root;
7432
7433         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7434
7435         btrfs_init_path(&path);
7436         key.objectid = last;
7437         key.offset = 0;
7438         key.type = BTRFS_EXTENT_ITEM_KEY;
7439         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7440         if (ret < 0)
7441                 goto out;
7442         ret = 0;
7443         while (1) {
7444                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7445                         ret = btrfs_next_leaf(root, &path);
7446                         if (ret < 0)
7447                                 goto out;
7448                         if (ret > 0) {
7449                                 ret = 0;
7450                                 break;
7451                         }
7452                 }
7453                 leaf = path.nodes[0];
7454                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7455                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7456                         break;
7457                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7458                     key.type != BTRFS_METADATA_ITEM_KEY) {
7459                         path.slots[0]++;
7460                         continue;
7461                 }
7462
7463                 if (last == key.objectid) {
7464                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7465                                 last = key.objectid + key.offset;
7466                         else
7467                                 last = key.objectid + root->fs_info->nodesize;
7468                         path.slots[0]++;
7469                         continue;
7470                 }
7471
7472                 ret = check_cache_range(root, cache, last,
7473                                         key.objectid - last);
7474                 if (ret)
7475                         break;
7476                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7477                         last = key.objectid + key.offset;
7478                 else
7479                         last = key.objectid + root->fs_info->nodesize;
7480                 path.slots[0]++;
7481         }
7482
7483         if (last < cache->key.objectid + cache->key.offset)
7484                 ret = check_cache_range(root, cache, last,
7485                                         cache->key.objectid +
7486                                         cache->key.offset - last);
7487
7488 out:
7489         btrfs_release_path(&path);
7490
7491         if (!ret &&
7492             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7493                 fprintf(stderr, "There are still entries left in the space "
7494                         "cache\n");
7495                 ret = -EINVAL;
7496         }
7497
7498         return ret;
7499 }
7500
7501 static int check_space_cache(struct btrfs_root *root)
7502 {
7503         struct btrfs_block_group_cache *cache;
7504         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7505         int ret;
7506         int error = 0;
7507
7508         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7509             btrfs_super_generation(root->fs_info->super_copy) !=
7510             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7511                 printf("cache and super generation don't match, space cache "
7512                        "will be invalidated\n");
7513                 return 0;
7514         }
7515
7516         if (ctx.progress_enabled) {
7517                 ctx.tp = TASK_FREE_SPACE;
7518                 task_start(ctx.info);
7519         }
7520
7521         while (1) {
7522                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7523                 if (!cache)
7524                         break;
7525
7526                 start = cache->key.objectid + cache->key.offset;
7527                 if (!cache->free_space_ctl) {
7528                         if (btrfs_init_free_space_ctl(cache,
7529                                                 root->fs_info->sectorsize)) {
7530                                 ret = -ENOMEM;
7531                                 break;
7532                         }
7533                 } else {
7534                         btrfs_remove_free_space_cache(cache);
7535                 }
7536
7537                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7538                         ret = exclude_super_stripes(root, cache);
7539                         if (ret) {
7540                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7541                                         strerror(-ret));
7542                                 error++;
7543                                 continue;
7544                         }
7545                         ret = load_free_space_tree(root->fs_info, cache);
7546                         free_excluded_extents(root, cache);
7547                         if (ret < 0) {
7548                                 fprintf(stderr, "could not load free space tree: %s\n",
7549                                         strerror(-ret));
7550                                 error++;
7551                                 continue;
7552                         }
7553                         error += ret;
7554                 } else {
7555                         ret = load_free_space_cache(root->fs_info, cache);
7556                         if (!ret)
7557                                 continue;
7558                 }
7559
7560                 ret = verify_space_cache(root, cache);
7561                 if (ret) {
7562                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7563                                 cache->key.objectid);
7564                         error++;
7565                 }
7566         }
7567
7568         task_stop(ctx.info);
7569
7570         return error ? -EINVAL : 0;
7571 }
7572
7573 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7574                         u64 num_bytes, unsigned long leaf_offset,
7575                         struct extent_buffer *eb) {
7576
7577         struct btrfs_fs_info *fs_info = root->fs_info;
7578         u64 offset = 0;
7579         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7580         char *data;
7581         unsigned long csum_offset;
7582         u32 csum;
7583         u32 csum_expected;
7584         u64 read_len;
7585         u64 data_checked = 0;
7586         u64 tmp;
7587         int ret = 0;
7588         int mirror;
7589         int num_copies;
7590
7591         if (num_bytes % fs_info->sectorsize)
7592                 return -EINVAL;
7593
7594         data = malloc(num_bytes);
7595         if (!data)
7596                 return -ENOMEM;
7597
7598         while (offset < num_bytes) {
7599                 mirror = 0;
7600 again:
7601                 read_len = num_bytes - offset;
7602                 /* read as much space once a time */
7603                 ret = read_extent_data(fs_info, data + offset,
7604                                 bytenr + offset, &read_len, mirror);
7605                 if (ret)
7606                         goto out;
7607                 data_checked = 0;
7608                 /* verify every 4k data's checksum */
7609                 while (data_checked < read_len) {
7610                         csum = ~(u32)0;
7611                         tmp = offset + data_checked;
7612
7613                         csum = btrfs_csum_data((char *)data + tmp,
7614                                                csum, fs_info->sectorsize);
7615                         btrfs_csum_final(csum, (u8 *)&csum);
7616
7617                         csum_offset = leaf_offset +
7618                                  tmp / fs_info->sectorsize * csum_size;
7619                         read_extent_buffer(eb, (char *)&csum_expected,
7620                                            csum_offset, csum_size);
7621                         /* try another mirror */
7622                         if (csum != csum_expected) {
7623                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7624                                                 mirror, bytenr + tmp,
7625                                                 csum, csum_expected);
7626                                 num_copies = btrfs_num_copies(root->fs_info,
7627                                                 bytenr, num_bytes);
7628                                 if (mirror < num_copies - 1) {
7629                                         mirror += 1;
7630                                         goto again;
7631                                 }
7632                         }
7633                         data_checked += fs_info->sectorsize;
7634                 }
7635                 offset += read_len;
7636         }
7637 out:
7638         free(data);
7639         return ret;
7640 }
7641
7642 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7643                                u64 num_bytes)
7644 {
7645         struct btrfs_path path;
7646         struct extent_buffer *leaf;
7647         struct btrfs_key key;
7648         int ret;
7649
7650         btrfs_init_path(&path);
7651         key.objectid = bytenr;
7652         key.type = BTRFS_EXTENT_ITEM_KEY;
7653         key.offset = (u64)-1;
7654
7655 again:
7656         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7657                                 0, 0);
7658         if (ret < 0) {
7659                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7660                 btrfs_release_path(&path);
7661                 return ret;
7662         } else if (ret) {
7663                 if (path.slots[0] > 0) {
7664                         path.slots[0]--;
7665                 } else {
7666                         ret = btrfs_prev_leaf(root, &path);
7667                         if (ret < 0) {
7668                                 goto out;
7669                         } else if (ret > 0) {
7670                                 ret = 0;
7671                                 goto out;
7672                         }
7673                 }
7674         }
7675
7676         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7677
7678         /*
7679          * Block group items come before extent items if they have the same
7680          * bytenr, so walk back one more just in case.  Dear future traveller,
7681          * first congrats on mastering time travel.  Now if it's not too much
7682          * trouble could you go back to 2006 and tell Chris to make the
7683          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7684          * EXTENT_ITEM_KEY please?
7685          */
7686         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7687                 if (path.slots[0] > 0) {
7688                         path.slots[0]--;
7689                 } else {
7690                         ret = btrfs_prev_leaf(root, &path);
7691                         if (ret < 0) {
7692                                 goto out;
7693                         } else if (ret > 0) {
7694                                 ret = 0;
7695                                 goto out;
7696                         }
7697                 }
7698                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7699         }
7700
7701         while (num_bytes) {
7702                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7703                         ret = btrfs_next_leaf(root, &path);
7704                         if (ret < 0) {
7705                                 fprintf(stderr, "Error going to next leaf "
7706                                         "%d\n", ret);
7707                                 btrfs_release_path(&path);
7708                                 return ret;
7709                         } else if (ret) {
7710                                 break;
7711                         }
7712                 }
7713                 leaf = path.nodes[0];
7714                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7715                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7716                         path.slots[0]++;
7717                         continue;
7718                 }
7719                 if (key.objectid + key.offset < bytenr) {
7720                         path.slots[0]++;
7721                         continue;
7722                 }
7723                 if (key.objectid > bytenr + num_bytes)
7724                         break;
7725
7726                 if (key.objectid == bytenr) {
7727                         if (key.offset >= num_bytes) {
7728                                 num_bytes = 0;
7729                                 break;
7730                         }
7731                         num_bytes -= key.offset;
7732                         bytenr += key.offset;
7733                 } else if (key.objectid < bytenr) {
7734                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7735                                 num_bytes = 0;
7736                                 break;
7737                         }
7738                         num_bytes = (bytenr + num_bytes) -
7739                                 (key.objectid + key.offset);
7740                         bytenr = key.objectid + key.offset;
7741                 } else {
7742                         if (key.objectid + key.offset < bytenr + num_bytes) {
7743                                 u64 new_start = key.objectid + key.offset;
7744                                 u64 new_bytes = bytenr + num_bytes - new_start;
7745
7746                                 /*
7747                                  * Weird case, the extent is in the middle of
7748                                  * our range, we'll have to search one side
7749                                  * and then the other.  Not sure if this happens
7750                                  * in real life, but no harm in coding it up
7751                                  * anyway just in case.
7752                                  */
7753                                 btrfs_release_path(&path);
7754                                 ret = check_extent_exists(root, new_start,
7755                                                           new_bytes);
7756                                 if (ret) {
7757                                         fprintf(stderr, "Right section didn't "
7758                                                 "have a record\n");
7759                                         break;
7760                                 }
7761                                 num_bytes = key.objectid - bytenr;
7762                                 goto again;
7763                         }
7764                         num_bytes = key.objectid - bytenr;
7765                 }
7766                 path.slots[0]++;
7767         }
7768         ret = 0;
7769
7770 out:
7771         if (num_bytes && !ret) {
7772                 fprintf(stderr, "There are no extents for csum range "
7773                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7774                 ret = 1;
7775         }
7776
7777         btrfs_release_path(&path);
7778         return ret;
7779 }
7780
7781 static int check_csums(struct btrfs_root *root)
7782 {
7783         struct btrfs_path path;
7784         struct extent_buffer *leaf;
7785         struct btrfs_key key;
7786         u64 offset = 0, num_bytes = 0;
7787         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7788         int errors = 0;
7789         int ret;
7790         u64 data_len;
7791         unsigned long leaf_offset;
7792
7793         root = root->fs_info->csum_root;
7794         if (!extent_buffer_uptodate(root->node)) {
7795                 fprintf(stderr, "No valid csum tree found\n");
7796                 return -ENOENT;
7797         }
7798
7799         btrfs_init_path(&path);
7800         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7801         key.type = BTRFS_EXTENT_CSUM_KEY;
7802         key.offset = 0;
7803         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7804         if (ret < 0) {
7805                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7806                 btrfs_release_path(&path);
7807                 return ret;
7808         }
7809
7810         if (ret > 0 && path.slots[0])
7811                 path.slots[0]--;
7812         ret = 0;
7813
7814         while (1) {
7815                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7816                         ret = btrfs_next_leaf(root, &path);
7817                         if (ret < 0) {
7818                                 fprintf(stderr, "Error going to next leaf "
7819                                         "%d\n", ret);
7820                                 break;
7821                         }
7822                         if (ret)
7823                                 break;
7824                 }
7825                 leaf = path.nodes[0];
7826
7827                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7828                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7829                         path.slots[0]++;
7830                         continue;
7831                 }
7832
7833                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7834                               csum_size) * root->fs_info->sectorsize;
7835                 if (!check_data_csum)
7836                         goto skip_csum_check;
7837                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7838                 ret = check_extent_csums(root, key.offset, data_len,
7839                                          leaf_offset, leaf);
7840                 if (ret)
7841                         break;
7842 skip_csum_check:
7843                 if (!num_bytes) {
7844                         offset = key.offset;
7845                 } else if (key.offset != offset + num_bytes) {
7846                         ret = check_extent_exists(root, offset, num_bytes);
7847                         if (ret) {
7848                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7849                                         "there is no extent record\n",
7850                                         offset, offset+num_bytes);
7851                                 errors++;
7852                         }
7853                         offset = key.offset;
7854                         num_bytes = 0;
7855                 }
7856                 num_bytes += data_len;
7857                 path.slots[0]++;
7858         }
7859
7860         btrfs_release_path(&path);
7861         return errors;
7862 }
7863
7864 static int is_dropped_key(struct btrfs_key *key,
7865                           struct btrfs_key *drop_key) {
7866         if (key->objectid < drop_key->objectid)
7867                 return 1;
7868         else if (key->objectid == drop_key->objectid) {
7869                 if (key->type < drop_key->type)
7870                         return 1;
7871                 else if (key->type == drop_key->type) {
7872                         if (key->offset < drop_key->offset)
7873                                 return 1;
7874                 }
7875         }
7876         return 0;
7877 }
7878
7879 /*
7880  * Here are the rules for FULL_BACKREF.
7881  *
7882  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7883  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7884  *      FULL_BACKREF set.
7885  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7886  *    if it happened after the relocation occurred since we'll have dropped the
7887  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7888  *    have no real way to know for sure.
7889  *
7890  * We process the blocks one root at a time, and we start from the lowest root
7891  * objectid and go to the highest.  So we can just lookup the owner backref for
7892  * the record and if we don't find it then we know it doesn't exist and we have
7893  * a FULL BACKREF.
7894  *
7895  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7896  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7897  * be set or not and then we can check later once we've gathered all the refs.
7898  */
7899 static int calc_extent_flag(struct cache_tree *extent_cache,
7900                            struct extent_buffer *buf,
7901                            struct root_item_record *ri,
7902                            u64 *flags)
7903 {
7904         struct extent_record *rec;
7905         struct cache_extent *cache;
7906         struct tree_backref *tback;
7907         u64 owner = 0;
7908
7909         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7910         /* we have added this extent before */
7911         if (!cache)
7912                 return -ENOENT;
7913
7914         rec = container_of(cache, struct extent_record, cache);
7915
7916         /*
7917          * Except file/reloc tree, we can not have
7918          * FULL BACKREF MODE
7919          */
7920         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7921                 goto normal;
7922         /*
7923          * root node
7924          */
7925         if (buf->start == ri->bytenr)
7926                 goto normal;
7927
7928         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7929                 goto full_backref;
7930
7931         owner = btrfs_header_owner(buf);
7932         if (owner == ri->objectid)
7933                 goto normal;
7934
7935         tback = find_tree_backref(rec, 0, owner);
7936         if (!tback)
7937                 goto full_backref;
7938 normal:
7939         *flags = 0;
7940         if (rec->flag_block_full_backref != FLAG_UNSET &&
7941             rec->flag_block_full_backref != 0)
7942                 rec->bad_full_backref = 1;
7943         return 0;
7944 full_backref:
7945         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7946         if (rec->flag_block_full_backref != FLAG_UNSET &&
7947             rec->flag_block_full_backref != 1)
7948                 rec->bad_full_backref = 1;
7949         return 0;
7950 }
7951
7952 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7953 {
7954         fprintf(stderr, "Invalid key type(");
7955         print_key_type(stderr, 0, key_type);
7956         fprintf(stderr, ") found in root(");
7957         print_objectid(stderr, rootid, 0);
7958         fprintf(stderr, ")\n");
7959 }
7960
7961 /*
7962  * Check if the key is valid with its extent buffer.
7963  *
7964  * This is a early check in case invalid key exists in a extent buffer
7965  * This is not comprehensive yet, but should prevent wrong key/item passed
7966  * further
7967  */
7968 static int check_type_with_root(u64 rootid, u8 key_type)
7969 {
7970         switch (key_type) {
7971         /* Only valid in chunk tree */
7972         case BTRFS_DEV_ITEM_KEY:
7973         case BTRFS_CHUNK_ITEM_KEY:
7974                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7975                         goto err;
7976                 break;
7977         /* valid in csum and log tree */
7978         case BTRFS_CSUM_TREE_OBJECTID:
7979                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7980                       is_fstree(rootid)))
7981                         goto err;
7982                 break;
7983         case BTRFS_EXTENT_ITEM_KEY:
7984         case BTRFS_METADATA_ITEM_KEY:
7985         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7986                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7987                         goto err;
7988                 break;
7989         case BTRFS_ROOT_ITEM_KEY:
7990                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7991                         goto err;
7992                 break;
7993         case BTRFS_DEV_EXTENT_KEY:
7994                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7995                         goto err;
7996                 break;
7997         }
7998         return 0;
7999 err:
8000         report_mismatch_key_root(key_type, rootid);
8001         return -EINVAL;
8002 }
8003
8004 static int run_next_block(struct btrfs_root *root,
8005                           struct block_info *bits,
8006                           int bits_nr,
8007                           u64 *last,
8008                           struct cache_tree *pending,
8009                           struct cache_tree *seen,
8010                           struct cache_tree *reada,
8011                           struct cache_tree *nodes,
8012                           struct cache_tree *extent_cache,
8013                           struct cache_tree *chunk_cache,
8014                           struct rb_root *dev_cache,
8015                           struct block_group_tree *block_group_cache,
8016                           struct device_extent_tree *dev_extent_cache,
8017                           struct root_item_record *ri)
8018 {
8019         struct btrfs_fs_info *fs_info = root->fs_info;
8020         struct extent_buffer *buf;
8021         struct extent_record *rec = NULL;
8022         u64 bytenr;
8023         u32 size;
8024         u64 parent;
8025         u64 owner;
8026         u64 flags;
8027         u64 ptr;
8028         u64 gen = 0;
8029         int ret = 0;
8030         int i;
8031         int nritems;
8032         struct btrfs_key key;
8033         struct cache_extent *cache;
8034         int reada_bits;
8035
8036         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8037                                     bits_nr, &reada_bits);
8038         if (nritems == 0)
8039                 return 1;
8040
8041         if (!reada_bits) {
8042                 for(i = 0; i < nritems; i++) {
8043                         ret = add_cache_extent(reada, bits[i].start,
8044                                                bits[i].size);
8045                         if (ret == -EEXIST)
8046                                 continue;
8047
8048                         /* fixme, get the parent transid */
8049                         readahead_tree_block(fs_info, bits[i].start, 0);
8050                 }
8051         }
8052         *last = bits[0].start;
8053         bytenr = bits[0].start;
8054         size = bits[0].size;
8055
8056         cache = lookup_cache_extent(pending, bytenr, size);
8057         if (cache) {
8058                 remove_cache_extent(pending, cache);
8059                 free(cache);
8060         }
8061         cache = lookup_cache_extent(reada, bytenr, size);
8062         if (cache) {
8063                 remove_cache_extent(reada, cache);
8064                 free(cache);
8065         }
8066         cache = lookup_cache_extent(nodes, bytenr, size);
8067         if (cache) {
8068                 remove_cache_extent(nodes, cache);
8069                 free(cache);
8070         }
8071         cache = lookup_cache_extent(extent_cache, bytenr, size);
8072         if (cache) {
8073                 rec = container_of(cache, struct extent_record, cache);
8074                 gen = rec->parent_generation;
8075         }
8076
8077         /* fixme, get the real parent transid */
8078         buf = read_tree_block(root->fs_info, bytenr, gen);
8079         if (!extent_buffer_uptodate(buf)) {
8080                 record_bad_block_io(root->fs_info,
8081                                     extent_cache, bytenr, size);
8082                 goto out;
8083         }
8084
8085         nritems = btrfs_header_nritems(buf);
8086
8087         flags = 0;
8088         if (!init_extent_tree) {
8089                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8090                                        btrfs_header_level(buf), 1, NULL,
8091                                        &flags);
8092                 if (ret < 0) {
8093                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8094                         if (ret < 0) {
8095                                 fprintf(stderr, "Couldn't calc extent flags\n");
8096                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8097                         }
8098                 }
8099         } else {
8100                 flags = 0;
8101                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8102                 if (ret < 0) {
8103                         fprintf(stderr, "Couldn't calc extent flags\n");
8104                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8105                 }
8106         }
8107
8108         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8109                 if (ri != NULL &&
8110                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8111                     ri->objectid == btrfs_header_owner(buf)) {
8112                         /*
8113                          * Ok we got to this block from it's original owner and
8114                          * we have FULL_BACKREF set.  Relocation can leave
8115                          * converted blocks over so this is altogether possible,
8116                          * however it's not possible if the generation > the
8117                          * last snapshot, so check for this case.
8118                          */
8119                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8120                             btrfs_header_generation(buf) > ri->last_snapshot) {
8121                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8122                                 rec->bad_full_backref = 1;
8123                         }
8124                 }
8125         } else {
8126                 if (ri != NULL &&
8127                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8128                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8129                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8130                         rec->bad_full_backref = 1;
8131                 }
8132         }
8133
8134         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8135                 rec->flag_block_full_backref = 1;
8136                 parent = bytenr;
8137                 owner = 0;
8138         } else {
8139                 rec->flag_block_full_backref = 0;
8140                 parent = 0;
8141                 owner = btrfs_header_owner(buf);
8142         }
8143
8144         ret = check_block(root, extent_cache, buf, flags);
8145         if (ret)
8146                 goto out;
8147
8148         if (btrfs_is_leaf(buf)) {
8149                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8150                 for (i = 0; i < nritems; i++) {
8151                         struct btrfs_file_extent_item *fi;
8152                         btrfs_item_key_to_cpu(buf, &key, i);
8153                         /*
8154                          * Check key type against the leaf owner.
8155                          * Could filter quite a lot of early error if
8156                          * owner is correct
8157                          */
8158                         if (check_type_with_root(btrfs_header_owner(buf),
8159                                                  key.type)) {
8160                                 fprintf(stderr, "ignoring invalid key\n");
8161                                 continue;
8162                         }
8163                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8164                                 process_extent_item(root, extent_cache, buf,
8165                                                     i);
8166                                 continue;
8167                         }
8168                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8169                                 process_extent_item(root, extent_cache, buf,
8170                                                     i);
8171                                 continue;
8172                         }
8173                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8174                                 total_csum_bytes +=
8175                                         btrfs_item_size_nr(buf, i);
8176                                 continue;
8177                         }
8178                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8179                                 process_chunk_item(chunk_cache, &key, buf, i);
8180                                 continue;
8181                         }
8182                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8183                                 process_device_item(dev_cache, &key, buf, i);
8184                                 continue;
8185                         }
8186                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8187                                 process_block_group_item(block_group_cache,
8188                                         &key, buf, i);
8189                                 continue;
8190                         }
8191                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8192                                 process_device_extent_item(dev_extent_cache,
8193                                         &key, buf, i);
8194                                 continue;
8195
8196                         }
8197                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8198 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8199                                 process_extent_ref_v0(extent_cache, buf, i);
8200 #else
8201                                 BUG();
8202 #endif
8203                                 continue;
8204                         }
8205
8206                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8207                                 ret = add_tree_backref(extent_cache,
8208                                                 key.objectid, 0, key.offset, 0);
8209                                 if (ret < 0)
8210                                         error(
8211                                 "add_tree_backref failed (leaf tree block): %s",
8212                                               strerror(-ret));
8213                                 continue;
8214                         }
8215                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8216                                 ret = add_tree_backref(extent_cache,
8217                                                 key.objectid, key.offset, 0, 0);
8218                                 if (ret < 0)
8219                                         error(
8220                                 "add_tree_backref failed (leaf shared block): %s",
8221                                               strerror(-ret));
8222                                 continue;
8223                         }
8224                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8225                                 struct btrfs_extent_data_ref *ref;
8226                                 ref = btrfs_item_ptr(buf, i,
8227                                                 struct btrfs_extent_data_ref);
8228                                 add_data_backref(extent_cache,
8229                                         key.objectid, 0,
8230                                         btrfs_extent_data_ref_root(buf, ref),
8231                                         btrfs_extent_data_ref_objectid(buf,
8232                                                                        ref),
8233                                         btrfs_extent_data_ref_offset(buf, ref),
8234                                         btrfs_extent_data_ref_count(buf, ref),
8235                                         0, root->fs_info->sectorsize);
8236                                 continue;
8237                         }
8238                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8239                                 struct btrfs_shared_data_ref *ref;
8240                                 ref = btrfs_item_ptr(buf, i,
8241                                                 struct btrfs_shared_data_ref);
8242                                 add_data_backref(extent_cache,
8243                                         key.objectid, key.offset, 0, 0, 0,
8244                                         btrfs_shared_data_ref_count(buf, ref),
8245                                         0, root->fs_info->sectorsize);
8246                                 continue;
8247                         }
8248                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8249                                 struct bad_item *bad;
8250
8251                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8252                                         continue;
8253                                 if (!owner)
8254                                         continue;
8255                                 bad = malloc(sizeof(struct bad_item));
8256                                 if (!bad)
8257                                         continue;
8258                                 INIT_LIST_HEAD(&bad->list);
8259                                 memcpy(&bad->key, &key,
8260                                        sizeof(struct btrfs_key));
8261                                 bad->root_id = owner;
8262                                 list_add_tail(&bad->list, &delete_items);
8263                                 continue;
8264                         }
8265                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8266                                 continue;
8267                         fi = btrfs_item_ptr(buf, i,
8268                                             struct btrfs_file_extent_item);
8269                         if (btrfs_file_extent_type(buf, fi) ==
8270                             BTRFS_FILE_EXTENT_INLINE)
8271                                 continue;
8272                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8273                                 continue;
8274
8275                         data_bytes_allocated +=
8276                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8277                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8278                                 abort();
8279                         }
8280                         data_bytes_referenced +=
8281                                 btrfs_file_extent_num_bytes(buf, fi);
8282                         add_data_backref(extent_cache,
8283                                 btrfs_file_extent_disk_bytenr(buf, fi),
8284                                 parent, owner, key.objectid, key.offset -
8285                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8286                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8287                 }
8288         } else {
8289                 int level;
8290                 struct btrfs_key first_key;
8291
8292                 first_key.objectid = 0;
8293
8294                 if (nritems > 0)
8295                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8296                 level = btrfs_header_level(buf);
8297                 for (i = 0; i < nritems; i++) {
8298                         struct extent_record tmpl;
8299
8300                         ptr = btrfs_node_blockptr(buf, i);
8301                         size = root->fs_info->nodesize;
8302                         btrfs_node_key_to_cpu(buf, &key, i);
8303                         if (ri != NULL) {
8304                                 if ((level == ri->drop_level)
8305                                     && is_dropped_key(&key, &ri->drop_key)) {
8306                                         continue;
8307                                 }
8308                         }
8309
8310                         memset(&tmpl, 0, sizeof(tmpl));
8311                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8312                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8313                         tmpl.start = ptr;
8314                         tmpl.nr = size;
8315                         tmpl.refs = 1;
8316                         tmpl.metadata = 1;
8317                         tmpl.max_size = size;
8318                         ret = add_extent_rec(extent_cache, &tmpl);
8319                         if (ret < 0)
8320                                 goto out;
8321
8322                         ret = add_tree_backref(extent_cache, ptr, parent,
8323                                         owner, 1);
8324                         if (ret < 0) {
8325                                 error(
8326                                 "add_tree_backref failed (non-leaf block): %s",
8327                                       strerror(-ret));
8328                                 continue;
8329                         }
8330
8331                         if (level > 1) {
8332                                 add_pending(nodes, seen, ptr, size);
8333                         } else {
8334                                 add_pending(pending, seen, ptr, size);
8335                         }
8336                 }
8337                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8338                                       nritems) * sizeof(struct btrfs_key_ptr);
8339         }
8340         total_btree_bytes += buf->len;
8341         if (fs_root_objectid(btrfs_header_owner(buf)))
8342                 total_fs_tree_bytes += buf->len;
8343         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8344                 total_extent_tree_bytes += buf->len;
8345 out:
8346         free_extent_buffer(buf);
8347         return ret;
8348 }
8349
8350 static int add_root_to_pending(struct extent_buffer *buf,
8351                                struct cache_tree *extent_cache,
8352                                struct cache_tree *pending,
8353                                struct cache_tree *seen,
8354                                struct cache_tree *nodes,
8355                                u64 objectid)
8356 {
8357         struct extent_record tmpl;
8358         int ret;
8359
8360         if (btrfs_header_level(buf) > 0)
8361                 add_pending(nodes, seen, buf->start, buf->len);
8362         else
8363                 add_pending(pending, seen, buf->start, buf->len);
8364
8365         memset(&tmpl, 0, sizeof(tmpl));
8366         tmpl.start = buf->start;
8367         tmpl.nr = buf->len;
8368         tmpl.is_root = 1;
8369         tmpl.refs = 1;
8370         tmpl.metadata = 1;
8371         tmpl.max_size = buf->len;
8372         add_extent_rec(extent_cache, &tmpl);
8373
8374         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8375             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8376                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8377                                 0, 1);
8378         else
8379                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8380                                 1);
8381         return ret;
8382 }
8383
8384 /* as we fix the tree, we might be deleting blocks that
8385  * we're tracking for repair.  This hook makes sure we
8386  * remove any backrefs for blocks as we are fixing them.
8387  */
8388 static int free_extent_hook(struct btrfs_trans_handle *trans,
8389                             struct btrfs_root *root,
8390                             u64 bytenr, u64 num_bytes, u64 parent,
8391                             u64 root_objectid, u64 owner, u64 offset,
8392                             int refs_to_drop)
8393 {
8394         struct extent_record *rec;
8395         struct cache_extent *cache;
8396         int is_data;
8397         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8398
8399         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8400         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8401         if (!cache)
8402                 return 0;
8403
8404         rec = container_of(cache, struct extent_record, cache);
8405         if (is_data) {
8406                 struct data_backref *back;
8407                 back = find_data_backref(rec, parent, root_objectid, owner,
8408                                          offset, 1, bytenr, num_bytes);
8409                 if (!back)
8410                         goto out;
8411                 if (back->node.found_ref) {
8412                         back->found_ref -= refs_to_drop;
8413                         if (rec->refs)
8414                                 rec->refs -= refs_to_drop;
8415                 }
8416                 if (back->node.found_extent_tree) {
8417                         back->num_refs -= refs_to_drop;
8418                         if (rec->extent_item_refs)
8419                                 rec->extent_item_refs -= refs_to_drop;
8420                 }
8421                 if (back->found_ref == 0)
8422                         back->node.found_ref = 0;
8423                 if (back->num_refs == 0)
8424                         back->node.found_extent_tree = 0;
8425
8426                 if (!back->node.found_extent_tree && back->node.found_ref) {
8427                         rb_erase(&back->node.node, &rec->backref_tree);
8428                         free(back);
8429                 }
8430         } else {
8431                 struct tree_backref *back;
8432                 back = find_tree_backref(rec, parent, root_objectid);
8433                 if (!back)
8434                         goto out;
8435                 if (back->node.found_ref) {
8436                         if (rec->refs)
8437                                 rec->refs--;
8438                         back->node.found_ref = 0;
8439                 }
8440                 if (back->node.found_extent_tree) {
8441                         if (rec->extent_item_refs)
8442                                 rec->extent_item_refs--;
8443                         back->node.found_extent_tree = 0;
8444                 }
8445                 if (!back->node.found_extent_tree && back->node.found_ref) {
8446                         rb_erase(&back->node.node, &rec->backref_tree);
8447                         free(back);
8448                 }
8449         }
8450         maybe_free_extent_rec(extent_cache, rec);
8451 out:
8452         return 0;
8453 }
8454
8455 static int delete_extent_records(struct btrfs_trans_handle *trans,
8456                                  struct btrfs_root *root,
8457                                  struct btrfs_path *path,
8458                                  u64 bytenr)
8459 {
8460         struct btrfs_key key;
8461         struct btrfs_key found_key;
8462         struct extent_buffer *leaf;
8463         int ret;
8464         int slot;
8465
8466
8467         key.objectid = bytenr;
8468         key.type = (u8)-1;
8469         key.offset = (u64)-1;
8470
8471         while(1) {
8472                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8473                                         &key, path, 0, 1);
8474                 if (ret < 0)
8475                         break;
8476
8477                 if (ret > 0) {
8478                         ret = 0;
8479                         if (path->slots[0] == 0)
8480                                 break;
8481                         path->slots[0]--;
8482                 }
8483                 ret = 0;
8484
8485                 leaf = path->nodes[0];
8486                 slot = path->slots[0];
8487
8488                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8489                 if (found_key.objectid != bytenr)
8490                         break;
8491
8492                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8493                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8494                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8495                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8496                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8497                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8498                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8499                         btrfs_release_path(path);
8500                         if (found_key.type == 0) {
8501                                 if (found_key.offset == 0)
8502                                         break;
8503                                 key.offset = found_key.offset - 1;
8504                                 key.type = found_key.type;
8505                         }
8506                         key.type = found_key.type - 1;
8507                         key.offset = (u64)-1;
8508                         continue;
8509                 }
8510
8511                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8512                         found_key.objectid, found_key.type, found_key.offset);
8513
8514                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8515                 if (ret)
8516                         break;
8517                 btrfs_release_path(path);
8518
8519                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8520                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8521                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8522                                 found_key.offset : root->fs_info->nodesize;
8523
8524                         ret = btrfs_update_block_group(trans, root, bytenr,
8525                                                        bytes, 0, 0);
8526                         if (ret)
8527                                 break;
8528                 }
8529         }
8530
8531         btrfs_release_path(path);
8532         return ret;
8533 }
8534
8535 /*
8536  * for a single backref, this will allocate a new extent
8537  * and add the backref to it.
8538  */
8539 static int record_extent(struct btrfs_trans_handle *trans,
8540                          struct btrfs_fs_info *info,
8541                          struct btrfs_path *path,
8542                          struct extent_record *rec,
8543                          struct extent_backref *back,
8544                          int allocated, u64 flags)
8545 {
8546         int ret = 0;
8547         struct btrfs_root *extent_root = info->extent_root;
8548         struct extent_buffer *leaf;
8549         struct btrfs_key ins_key;
8550         struct btrfs_extent_item *ei;
8551         struct data_backref *dback;
8552         struct btrfs_tree_block_info *bi;
8553
8554         if (!back->is_data)
8555                 rec->max_size = max_t(u64, rec->max_size,
8556                                     info->nodesize);
8557
8558         if (!allocated) {
8559                 u32 item_size = sizeof(*ei);
8560
8561                 if (!back->is_data)
8562                         item_size += sizeof(*bi);
8563
8564                 ins_key.objectid = rec->start;
8565                 ins_key.offset = rec->max_size;
8566                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8567
8568                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8569                                         &ins_key, item_size);
8570                 if (ret)
8571                         goto fail;
8572
8573                 leaf = path->nodes[0];
8574                 ei = btrfs_item_ptr(leaf, path->slots[0],
8575                                     struct btrfs_extent_item);
8576
8577                 btrfs_set_extent_refs(leaf, ei, 0);
8578                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8579
8580                 if (back->is_data) {
8581                         btrfs_set_extent_flags(leaf, ei,
8582                                                BTRFS_EXTENT_FLAG_DATA);
8583                 } else {
8584                         struct btrfs_disk_key copy_key;;
8585
8586                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8587                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8588                                              sizeof(*bi));
8589
8590                         btrfs_set_disk_key_objectid(&copy_key,
8591                                                     rec->info_objectid);
8592                         btrfs_set_disk_key_type(&copy_key, 0);
8593                         btrfs_set_disk_key_offset(&copy_key, 0);
8594
8595                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8596                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8597
8598                         btrfs_set_extent_flags(leaf, ei,
8599                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8600                 }
8601
8602                 btrfs_mark_buffer_dirty(leaf);
8603                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8604                                                rec->max_size, 1, 0);
8605                 if (ret)
8606                         goto fail;
8607                 btrfs_release_path(path);
8608         }
8609
8610         if (back->is_data) {
8611                 u64 parent;
8612                 int i;
8613
8614                 dback = to_data_backref(back);
8615                 if (back->full_backref)
8616                         parent = dback->parent;
8617                 else
8618                         parent = 0;
8619
8620                 for (i = 0; i < dback->found_ref; i++) {
8621                         /* if parent != 0, we're doing a full backref
8622                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8623                          * just makes the backref allocator create a data
8624                          * backref
8625                          */
8626                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8627                                                    rec->start, rec->max_size,
8628                                                    parent,
8629                                                    dback->root,
8630                                                    parent ?
8631                                                    BTRFS_FIRST_FREE_OBJECTID :
8632                                                    dback->owner,
8633                                                    dback->offset);
8634                         if (ret)
8635                                 break;
8636                 }
8637                 fprintf(stderr, "adding new data backref"
8638                                 " on %llu %s %llu owner %llu"
8639                                 " offset %llu found %d\n",
8640                                 (unsigned long long)rec->start,
8641                                 back->full_backref ?
8642                                 "parent" : "root",
8643                                 back->full_backref ?
8644                                 (unsigned long long)parent :
8645                                 (unsigned long long)dback->root,
8646                                 (unsigned long long)dback->owner,
8647                                 (unsigned long long)dback->offset,
8648                                 dback->found_ref);
8649         } else {
8650                 u64 parent;
8651                 struct tree_backref *tback;
8652
8653                 tback = to_tree_backref(back);
8654                 if (back->full_backref)
8655                         parent = tback->parent;
8656                 else
8657                         parent = 0;
8658
8659                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8660                                            rec->start, rec->max_size,
8661                                            parent, tback->root, 0, 0);
8662                 fprintf(stderr, "adding new tree backref on "
8663                         "start %llu len %llu parent %llu root %llu\n",
8664                         rec->start, rec->max_size, parent, tback->root);
8665         }
8666 fail:
8667         btrfs_release_path(path);
8668         return ret;
8669 }
8670
8671 static struct extent_entry *find_entry(struct list_head *entries,
8672                                        u64 bytenr, u64 bytes)
8673 {
8674         struct extent_entry *entry = NULL;
8675
8676         list_for_each_entry(entry, entries, list) {
8677                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8678                         return entry;
8679         }
8680
8681         return NULL;
8682 }
8683
8684 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8685 {
8686         struct extent_entry *entry, *best = NULL, *prev = NULL;
8687
8688         list_for_each_entry(entry, entries, list) {
8689                 /*
8690                  * If there are as many broken entries as entries then we know
8691                  * not to trust this particular entry.
8692                  */
8693                 if (entry->broken == entry->count)
8694                         continue;
8695
8696                 /*
8697                  * Special case, when there are only two entries and 'best' is
8698                  * the first one
8699                  */
8700                 if (!prev) {
8701                         best = entry;
8702                         prev = entry;
8703                         continue;
8704                 }
8705
8706                 /*
8707                  * If our current entry == best then we can't be sure our best
8708                  * is really the best, so we need to keep searching.
8709                  */
8710                 if (best && best->count == entry->count) {
8711                         prev = entry;
8712                         best = NULL;
8713                         continue;
8714                 }
8715
8716                 /* Prev == entry, not good enough, have to keep searching */
8717                 if (!prev->broken && prev->count == entry->count)
8718                         continue;
8719
8720                 if (!best)
8721                         best = (prev->count > entry->count) ? prev : entry;
8722                 else if (best->count < entry->count)
8723                         best = entry;
8724                 prev = entry;
8725         }
8726
8727         return best;
8728 }
8729
8730 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8731                       struct data_backref *dback, struct extent_entry *entry)
8732 {
8733         struct btrfs_trans_handle *trans;
8734         struct btrfs_root *root;
8735         struct btrfs_file_extent_item *fi;
8736         struct extent_buffer *leaf;
8737         struct btrfs_key key;
8738         u64 bytenr, bytes;
8739         int ret, err;
8740
8741         key.objectid = dback->root;
8742         key.type = BTRFS_ROOT_ITEM_KEY;
8743         key.offset = (u64)-1;
8744         root = btrfs_read_fs_root(info, &key);
8745         if (IS_ERR(root)) {
8746                 fprintf(stderr, "Couldn't find root for our ref\n");
8747                 return -EINVAL;
8748         }
8749
8750         /*
8751          * The backref points to the original offset of the extent if it was
8752          * split, so we need to search down to the offset we have and then walk
8753          * forward until we find the backref we're looking for.
8754          */
8755         key.objectid = dback->owner;
8756         key.type = BTRFS_EXTENT_DATA_KEY;
8757         key.offset = dback->offset;
8758         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8759         if (ret < 0) {
8760                 fprintf(stderr, "Error looking up ref %d\n", ret);
8761                 return ret;
8762         }
8763
8764         while (1) {
8765                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8766                         ret = btrfs_next_leaf(root, path);
8767                         if (ret) {
8768                                 fprintf(stderr, "Couldn't find our ref, next\n");
8769                                 return -EINVAL;
8770                         }
8771                 }
8772                 leaf = path->nodes[0];
8773                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8774                 if (key.objectid != dback->owner ||
8775                     key.type != BTRFS_EXTENT_DATA_KEY) {
8776                         fprintf(stderr, "Couldn't find our ref, search\n");
8777                         return -EINVAL;
8778                 }
8779                 fi = btrfs_item_ptr(leaf, path->slots[0],
8780                                     struct btrfs_file_extent_item);
8781                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8782                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8783
8784                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8785                         break;
8786                 path->slots[0]++;
8787         }
8788
8789         btrfs_release_path(path);
8790
8791         trans = btrfs_start_transaction(root, 1);
8792         if (IS_ERR(trans))
8793                 return PTR_ERR(trans);
8794
8795         /*
8796          * Ok we have the key of the file extent we want to fix, now we can cow
8797          * down to the thing and fix it.
8798          */
8799         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8800         if (ret < 0) {
8801                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8802                         key.objectid, key.type, key.offset, ret);
8803                 goto out;
8804         }
8805         if (ret > 0) {
8806                 fprintf(stderr, "Well that's odd, we just found this key "
8807                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8808                         key.offset);
8809                 ret = -EINVAL;
8810                 goto out;
8811         }
8812         leaf = path->nodes[0];
8813         fi = btrfs_item_ptr(leaf, path->slots[0],
8814                             struct btrfs_file_extent_item);
8815
8816         if (btrfs_file_extent_compression(leaf, fi) &&
8817             dback->disk_bytenr != entry->bytenr) {
8818                 fprintf(stderr, "Ref doesn't match the record start and is "
8819                         "compressed, please take a btrfs-image of this file "
8820                         "system and send it to a btrfs developer so they can "
8821                         "complete this functionality for bytenr %Lu\n",
8822                         dback->disk_bytenr);
8823                 ret = -EINVAL;
8824                 goto out;
8825         }
8826
8827         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8828                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8829         } else if (dback->disk_bytenr > entry->bytenr) {
8830                 u64 off_diff, offset;
8831
8832                 off_diff = dback->disk_bytenr - entry->bytenr;
8833                 offset = btrfs_file_extent_offset(leaf, fi);
8834                 if (dback->disk_bytenr + offset +
8835                     btrfs_file_extent_num_bytes(leaf, fi) >
8836                     entry->bytenr + entry->bytes) {
8837                         fprintf(stderr, "Ref is past the entry end, please "
8838                                 "take a btrfs-image of this file system and "
8839                                 "send it to a btrfs developer, ref %Lu\n",
8840                                 dback->disk_bytenr);
8841                         ret = -EINVAL;
8842                         goto out;
8843                 }
8844                 offset += off_diff;
8845                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8846                 btrfs_set_file_extent_offset(leaf, fi, offset);
8847         } else if (dback->disk_bytenr < entry->bytenr) {
8848                 u64 offset;
8849
8850                 offset = btrfs_file_extent_offset(leaf, fi);
8851                 if (dback->disk_bytenr + offset < entry->bytenr) {
8852                         fprintf(stderr, "Ref is before the entry start, please"
8853                                 " take a btrfs-image of this file system and "
8854                                 "send it to a btrfs developer, ref %Lu\n",
8855                                 dback->disk_bytenr);
8856                         ret = -EINVAL;
8857                         goto out;
8858                 }
8859
8860                 offset += dback->disk_bytenr;
8861                 offset -= entry->bytenr;
8862                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8863                 btrfs_set_file_extent_offset(leaf, fi, offset);
8864         }
8865
8866         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8867
8868         /*
8869          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8870          * only do this if we aren't using compression, otherwise it's a
8871          * trickier case.
8872          */
8873         if (!btrfs_file_extent_compression(leaf, fi))
8874                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8875         else
8876                 printf("ram bytes may be wrong?\n");
8877         btrfs_mark_buffer_dirty(leaf);
8878 out:
8879         err = btrfs_commit_transaction(trans, root);
8880         btrfs_release_path(path);
8881         return ret ? ret : err;
8882 }
8883
8884 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8885                            struct extent_record *rec)
8886 {
8887         struct extent_backref *back, *tmp;
8888         struct data_backref *dback;
8889         struct extent_entry *entry, *best = NULL;
8890         LIST_HEAD(entries);
8891         int nr_entries = 0;
8892         int broken_entries = 0;
8893         int ret = 0;
8894         short mismatch = 0;
8895
8896         /*
8897          * Metadata is easy and the backrefs should always agree on bytenr and
8898          * size, if not we've got bigger issues.
8899          */
8900         if (rec->metadata)
8901                 return 0;
8902
8903         rbtree_postorder_for_each_entry_safe(back, tmp,
8904                                              &rec->backref_tree, node) {
8905                 if (back->full_backref || !back->is_data)
8906                         continue;
8907
8908                 dback = to_data_backref(back);
8909
8910                 /*
8911                  * We only pay attention to backrefs that we found a real
8912                  * backref for.
8913                  */
8914                 if (dback->found_ref == 0)
8915                         continue;
8916
8917                 /*
8918                  * For now we only catch when the bytes don't match, not the
8919                  * bytenr.  We can easily do this at the same time, but I want
8920                  * to have a fs image to test on before we just add repair
8921                  * functionality willy-nilly so we know we won't screw up the
8922                  * repair.
8923                  */
8924
8925                 entry = find_entry(&entries, dback->disk_bytenr,
8926                                    dback->bytes);
8927                 if (!entry) {
8928                         entry = malloc(sizeof(struct extent_entry));
8929                         if (!entry) {
8930                                 ret = -ENOMEM;
8931                                 goto out;
8932                         }
8933                         memset(entry, 0, sizeof(*entry));
8934                         entry->bytenr = dback->disk_bytenr;
8935                         entry->bytes = dback->bytes;
8936                         list_add_tail(&entry->list, &entries);
8937                         nr_entries++;
8938                 }
8939
8940                 /*
8941                  * If we only have on entry we may think the entries agree when
8942                  * in reality they don't so we have to do some extra checking.
8943                  */
8944                 if (dback->disk_bytenr != rec->start ||
8945                     dback->bytes != rec->nr || back->broken)
8946                         mismatch = 1;
8947
8948                 if (back->broken) {
8949                         entry->broken++;
8950                         broken_entries++;
8951                 }
8952
8953                 entry->count++;
8954         }
8955
8956         /* Yay all the backrefs agree, carry on good sir */
8957         if (nr_entries <= 1 && !mismatch)
8958                 goto out;
8959
8960         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8961                 "%Lu\n", rec->start);
8962
8963         /*
8964          * First we want to see if the backrefs can agree amongst themselves who
8965          * is right, so figure out which one of the entries has the highest
8966          * count.
8967          */
8968         best = find_most_right_entry(&entries);
8969
8970         /*
8971          * Ok so we may have an even split between what the backrefs think, so
8972          * this is where we use the extent ref to see what it thinks.
8973          */
8974         if (!best) {
8975                 entry = find_entry(&entries, rec->start, rec->nr);
8976                 if (!entry && (!broken_entries || !rec->found_rec)) {
8977                         fprintf(stderr, "Backrefs don't agree with each other "
8978                                 "and extent record doesn't agree with anybody,"
8979                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8980                                 rec->start, rec->nr);
8981                         ret = -EINVAL;
8982                         goto out;
8983                 } else if (!entry) {
8984                         /*
8985                          * Ok our backrefs were broken, we'll assume this is the
8986                          * correct value and add an entry for this range.
8987                          */
8988                         entry = malloc(sizeof(struct extent_entry));
8989                         if (!entry) {
8990                                 ret = -ENOMEM;
8991                                 goto out;
8992                         }
8993                         memset(entry, 0, sizeof(*entry));
8994                         entry->bytenr = rec->start;
8995                         entry->bytes = rec->nr;
8996                         list_add_tail(&entry->list, &entries);
8997                         nr_entries++;
8998                 }
8999                 entry->count++;
9000                 best = find_most_right_entry(&entries);
9001                 if (!best) {
9002                         fprintf(stderr, "Backrefs and extent record evenly "
9003                                 "split on who is right, this is going to "
9004                                 "require user input to fix bytenr %Lu bytes "
9005                                 "%Lu\n", rec->start, rec->nr);
9006                         ret = -EINVAL;
9007                         goto out;
9008                 }
9009         }
9010
9011         /*
9012          * I don't think this can happen currently as we'll abort() if we catch
9013          * this case higher up, but in case somebody removes that we still can't
9014          * deal with it properly here yet, so just bail out of that's the case.
9015          */
9016         if (best->bytenr != rec->start) {
9017                 fprintf(stderr, "Extent start and backref starts don't match, "
9018                         "please use btrfs-image on this file system and send "
9019                         "it to a btrfs developer so they can make fsck fix "
9020                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9021                         rec->start, rec->nr);
9022                 ret = -EINVAL;
9023                 goto out;
9024         }
9025
9026         /*
9027          * Ok great we all agreed on an extent record, let's go find the real
9028          * references and fix up the ones that don't match.
9029          */
9030         rbtree_postorder_for_each_entry_safe(back, tmp,
9031                                              &rec->backref_tree, node) {
9032                 if (back->full_backref || !back->is_data)
9033                         continue;
9034
9035                 dback = to_data_backref(back);
9036
9037                 /*
9038                  * Still ignoring backrefs that don't have a real ref attached
9039                  * to them.
9040                  */
9041                 if (dback->found_ref == 0)
9042                         continue;
9043
9044                 if (dback->bytes == best->bytes &&
9045                     dback->disk_bytenr == best->bytenr)
9046                         continue;
9047
9048                 ret = repair_ref(info, path, dback, best);
9049                 if (ret)
9050                         goto out;
9051         }
9052
9053         /*
9054          * Ok we messed with the actual refs, which means we need to drop our
9055          * entire cache and go back and rescan.  I know this is a huge pain and
9056          * adds a lot of extra work, but it's the only way to be safe.  Once all
9057          * the backrefs agree we may not need to do anything to the extent
9058          * record itself.
9059          */
9060         ret = -EAGAIN;
9061 out:
9062         while (!list_empty(&entries)) {
9063                 entry = list_entry(entries.next, struct extent_entry, list);
9064                 list_del_init(&entry->list);
9065                 free(entry);
9066         }
9067         return ret;
9068 }
9069
9070 static int process_duplicates(struct cache_tree *extent_cache,
9071                               struct extent_record *rec)
9072 {
9073         struct extent_record *good, *tmp;
9074         struct cache_extent *cache;
9075         int ret;
9076
9077         /*
9078          * If we found a extent record for this extent then return, or if we
9079          * have more than one duplicate we are likely going to need to delete
9080          * something.
9081          */
9082         if (rec->found_rec || rec->num_duplicates > 1)
9083                 return 0;
9084
9085         /* Shouldn't happen but just in case */
9086         BUG_ON(!rec->num_duplicates);
9087
9088         /*
9089          * So this happens if we end up with a backref that doesn't match the
9090          * actual extent entry.  So either the backref is bad or the extent
9091          * entry is bad.  Either way we want to have the extent_record actually
9092          * reflect what we found in the extent_tree, so we need to take the
9093          * duplicate out and use that as the extent_record since the only way we
9094          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9095          */
9096         remove_cache_extent(extent_cache, &rec->cache);
9097
9098         good = to_extent_record(rec->dups.next);
9099         list_del_init(&good->list);
9100         INIT_LIST_HEAD(&good->backrefs);
9101         INIT_LIST_HEAD(&good->dups);
9102         good->cache.start = good->start;
9103         good->cache.size = good->nr;
9104         good->content_checked = 0;
9105         good->owner_ref_checked = 0;
9106         good->num_duplicates = 0;
9107         good->refs = rec->refs;
9108         list_splice_init(&rec->backrefs, &good->backrefs);
9109         while (1) {
9110                 cache = lookup_cache_extent(extent_cache, good->start,
9111                                             good->nr);
9112                 if (!cache)
9113                         break;
9114                 tmp = container_of(cache, struct extent_record, cache);
9115
9116                 /*
9117                  * If we find another overlapping extent and it's found_rec is
9118                  * set then it's a duplicate and we need to try and delete
9119                  * something.
9120                  */
9121                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9122                         if (list_empty(&good->list))
9123                                 list_add_tail(&good->list,
9124                                               &duplicate_extents);
9125                         good->num_duplicates += tmp->num_duplicates + 1;
9126                         list_splice_init(&tmp->dups, &good->dups);
9127                         list_del_init(&tmp->list);
9128                         list_add_tail(&tmp->list, &good->dups);
9129                         remove_cache_extent(extent_cache, &tmp->cache);
9130                         continue;
9131                 }
9132
9133                 /*
9134                  * Ok we have another non extent item backed extent rec, so lets
9135                  * just add it to this extent and carry on like we did above.
9136                  */
9137                 good->refs += tmp->refs;
9138                 list_splice_init(&tmp->backrefs, &good->backrefs);
9139                 remove_cache_extent(extent_cache, &tmp->cache);
9140                 free(tmp);
9141         }
9142         ret = insert_cache_extent(extent_cache, &good->cache);
9143         BUG_ON(ret);
9144         free(rec);
9145         return good->num_duplicates ? 0 : 1;
9146 }
9147
9148 static int delete_duplicate_records(struct btrfs_root *root,
9149                                     struct extent_record *rec)
9150 {
9151         struct btrfs_trans_handle *trans;
9152         LIST_HEAD(delete_list);
9153         struct btrfs_path path;
9154         struct extent_record *tmp, *good, *n;
9155         int nr_del = 0;
9156         int ret = 0, err;
9157         struct btrfs_key key;
9158
9159         btrfs_init_path(&path);
9160
9161         good = rec;
9162         /* Find the record that covers all of the duplicates. */
9163         list_for_each_entry(tmp, &rec->dups, list) {
9164                 if (good->start < tmp->start)
9165                         continue;
9166                 if (good->nr > tmp->nr)
9167                         continue;
9168
9169                 if (tmp->start + tmp->nr < good->start + good->nr) {
9170                         fprintf(stderr, "Ok we have overlapping extents that "
9171                                 "aren't completely covered by each other, this "
9172                                 "is going to require more careful thought.  "
9173                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9174                                 tmp->start, tmp->nr, good->start, good->nr);
9175                         abort();
9176                 }
9177                 good = tmp;
9178         }
9179
9180         if (good != rec)
9181                 list_add_tail(&rec->list, &delete_list);
9182
9183         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9184                 if (tmp == good)
9185                         continue;
9186                 list_move_tail(&tmp->list, &delete_list);
9187         }
9188
9189         root = root->fs_info->extent_root;
9190         trans = btrfs_start_transaction(root, 1);
9191         if (IS_ERR(trans)) {
9192                 ret = PTR_ERR(trans);
9193                 goto out;
9194         }
9195
9196         list_for_each_entry(tmp, &delete_list, list) {
9197                 if (tmp->found_rec == 0)
9198                         continue;
9199                 key.objectid = tmp->start;
9200                 key.type = BTRFS_EXTENT_ITEM_KEY;
9201                 key.offset = tmp->nr;
9202
9203                 /* Shouldn't happen but just in case */
9204                 if (tmp->metadata) {
9205                         fprintf(stderr, "Well this shouldn't happen, extent "
9206                                 "record overlaps but is metadata? "
9207                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9208                         abort();
9209                 }
9210
9211                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9212                 if (ret) {
9213                         if (ret > 0)
9214                                 ret = -EINVAL;
9215                         break;
9216                 }
9217                 ret = btrfs_del_item(trans, root, &path);
9218                 if (ret)
9219                         break;
9220                 btrfs_release_path(&path);
9221                 nr_del++;
9222         }
9223         err = btrfs_commit_transaction(trans, root);
9224         if (err && !ret)
9225                 ret = err;
9226 out:
9227         while (!list_empty(&delete_list)) {
9228                 tmp = to_extent_record(delete_list.next);
9229                 list_del_init(&tmp->list);
9230                 if (tmp == rec)
9231                         continue;
9232                 free(tmp);
9233         }
9234
9235         while (!list_empty(&rec->dups)) {
9236                 tmp = to_extent_record(rec->dups.next);
9237                 list_del_init(&tmp->list);
9238                 free(tmp);
9239         }
9240
9241         btrfs_release_path(&path);
9242
9243         if (!ret && !nr_del)
9244                 rec->num_duplicates = 0;
9245
9246         return ret ? ret : nr_del;
9247 }
9248
9249 static int find_possible_backrefs(struct btrfs_fs_info *info,
9250                                   struct btrfs_path *path,
9251                                   struct cache_tree *extent_cache,
9252                                   struct extent_record *rec)
9253 {
9254         struct btrfs_root *root;
9255         struct extent_backref *back, *tmp;
9256         struct data_backref *dback;
9257         struct cache_extent *cache;
9258         struct btrfs_file_extent_item *fi;
9259         struct btrfs_key key;
9260         u64 bytenr, bytes;
9261         int ret;
9262
9263         rbtree_postorder_for_each_entry_safe(back, tmp,
9264                                              &rec->backref_tree, node) {
9265                 /* Don't care about full backrefs (poor unloved backrefs) */
9266                 if (back->full_backref || !back->is_data)
9267                         continue;
9268
9269                 dback = to_data_backref(back);
9270
9271                 /* We found this one, we don't need to do a lookup */
9272                 if (dback->found_ref)
9273                         continue;
9274
9275                 key.objectid = dback->root;
9276                 key.type = BTRFS_ROOT_ITEM_KEY;
9277                 key.offset = (u64)-1;
9278
9279                 root = btrfs_read_fs_root(info, &key);
9280
9281                 /* No root, definitely a bad ref, skip */
9282                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9283                         continue;
9284                 /* Other err, exit */
9285                 if (IS_ERR(root))
9286                         return PTR_ERR(root);
9287
9288                 key.objectid = dback->owner;
9289                 key.type = BTRFS_EXTENT_DATA_KEY;
9290                 key.offset = dback->offset;
9291                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9292                 if (ret) {
9293                         btrfs_release_path(path);
9294                         if (ret < 0)
9295                                 return ret;
9296                         /* Didn't find it, we can carry on */
9297                         ret = 0;
9298                         continue;
9299                 }
9300
9301                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9302                                     struct btrfs_file_extent_item);
9303                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9304                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9305                 btrfs_release_path(path);
9306                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9307                 if (cache) {
9308                         struct extent_record *tmp;
9309                         tmp = container_of(cache, struct extent_record, cache);
9310
9311                         /*
9312                          * If we found an extent record for the bytenr for this
9313                          * particular backref then we can't add it to our
9314                          * current extent record.  We only want to add backrefs
9315                          * that don't have a corresponding extent item in the
9316                          * extent tree since they likely belong to this record
9317                          * and we need to fix it if it doesn't match bytenrs.
9318                          */
9319                         if  (tmp->found_rec)
9320                                 continue;
9321                 }
9322
9323                 dback->found_ref += 1;
9324                 dback->disk_bytenr = bytenr;
9325                 dback->bytes = bytes;
9326
9327                 /*
9328                  * Set this so the verify backref code knows not to trust the
9329                  * values in this backref.
9330                  */
9331                 back->broken = 1;
9332         }
9333
9334         return 0;
9335 }
9336
9337 /*
9338  * Record orphan data ref into corresponding root.
9339  *
9340  * Return 0 if the extent item contains data ref and recorded.
9341  * Return 1 if the extent item contains no useful data ref
9342  *   On that case, it may contains only shared_dataref or metadata backref
9343  *   or the file extent exists(this should be handled by the extent bytenr
9344  *   recovery routine)
9345  * Return <0 if something goes wrong.
9346  */
9347 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9348                                       struct extent_record *rec)
9349 {
9350         struct btrfs_key key;
9351         struct btrfs_root *dest_root;
9352         struct extent_backref *back, *tmp;
9353         struct data_backref *dback;
9354         struct orphan_data_extent *orphan;
9355         struct btrfs_path path;
9356         int recorded_data_ref = 0;
9357         int ret = 0;
9358
9359         if (rec->metadata)
9360                 return 1;
9361         btrfs_init_path(&path);
9362         rbtree_postorder_for_each_entry_safe(back, tmp,
9363                                              &rec->backref_tree, node) {
9364                 if (back->full_backref || !back->is_data ||
9365                     !back->found_extent_tree)
9366                         continue;
9367                 dback = to_data_backref(back);
9368                 if (dback->found_ref)
9369                         continue;
9370                 key.objectid = dback->root;
9371                 key.type = BTRFS_ROOT_ITEM_KEY;
9372                 key.offset = (u64)-1;
9373
9374                 dest_root = btrfs_read_fs_root(fs_info, &key);
9375
9376                 /* For non-exist root we just skip it */
9377                 if (IS_ERR(dest_root) || !dest_root)
9378                         continue;
9379
9380                 key.objectid = dback->owner;
9381                 key.type = BTRFS_EXTENT_DATA_KEY;
9382                 key.offset = dback->offset;
9383
9384                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9385                 btrfs_release_path(&path);
9386                 /*
9387                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9388                  * we need to record it for inode/file extent rebuild.
9389                  * For ret > 0, we record it only for file extent rebuild.
9390                  * For ret == 0, the file extent exists but only bytenr
9391                  * mismatch, let the original bytenr fix routine to handle,
9392                  * don't record it.
9393                  */
9394                 if (ret == 0)
9395                         continue;
9396                 ret = 0;
9397                 orphan = malloc(sizeof(*orphan));
9398                 if (!orphan) {
9399                         ret = -ENOMEM;
9400                         goto out;
9401                 }
9402                 INIT_LIST_HEAD(&orphan->list);
9403                 orphan->root = dback->root;
9404                 orphan->objectid = dback->owner;
9405                 orphan->offset = dback->offset;
9406                 orphan->disk_bytenr = rec->cache.start;
9407                 orphan->disk_len = rec->cache.size;
9408                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9409                 recorded_data_ref = 1;
9410         }
9411 out:
9412         btrfs_release_path(&path);
9413         if (!ret)
9414                 return !recorded_data_ref;
9415         else
9416                 return ret;
9417 }
9418
9419 /*
9420  * when an incorrect extent item is found, this will delete
9421  * all of the existing entries for it and recreate them
9422  * based on what the tree scan found.
9423  */
9424 static int fixup_extent_refs(struct btrfs_fs_info *info,
9425                              struct cache_tree *extent_cache,
9426                              struct extent_record *rec)
9427 {
9428         struct btrfs_trans_handle *trans = NULL;
9429         int ret;
9430         struct btrfs_path path;
9431         struct cache_extent *cache;
9432         struct extent_backref *back, *tmp;
9433         int allocated = 0;
9434         u64 flags = 0;
9435
9436         if (rec->flag_block_full_backref)
9437                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9438
9439         btrfs_init_path(&path);
9440         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9441                 /*
9442                  * Sometimes the backrefs themselves are so broken they don't
9443                  * get attached to any meaningful rec, so first go back and
9444                  * check any of our backrefs that we couldn't find and throw
9445                  * them into the list if we find the backref so that
9446                  * verify_backrefs can figure out what to do.
9447                  */
9448                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9449                 if (ret < 0)
9450                         goto out;
9451         }
9452
9453         /* step one, make sure all of the backrefs agree */
9454         ret = verify_backrefs(info, &path, rec);
9455         if (ret < 0)
9456                 goto out;
9457
9458         trans = btrfs_start_transaction(info->extent_root, 1);
9459         if (IS_ERR(trans)) {
9460                 ret = PTR_ERR(trans);
9461                 goto out;
9462         }
9463
9464         /* step two, delete all the existing records */
9465         ret = delete_extent_records(trans, info->extent_root, &path,
9466                                     rec->start);
9467
9468         if (ret < 0)
9469                 goto out;
9470
9471         /* was this block corrupt?  If so, don't add references to it */
9472         cache = lookup_cache_extent(info->corrupt_blocks,
9473                                     rec->start, rec->max_size);
9474         if (cache) {
9475                 ret = 0;
9476                 goto out;
9477         }
9478
9479         /* step three, recreate all the refs we did find */
9480         rbtree_postorder_for_each_entry_safe(back, tmp,
9481                                              &rec->backref_tree, node) {
9482                 /*
9483                  * if we didn't find any references, don't create a
9484                  * new extent record
9485                  */
9486                 if (!back->found_ref)
9487                         continue;
9488
9489                 rec->bad_full_backref = 0;
9490                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9491                 allocated = 1;
9492
9493                 if (ret)
9494                         goto out;
9495         }
9496 out:
9497         if (trans) {
9498                 int err = btrfs_commit_transaction(trans, info->extent_root);
9499                 if (!ret)
9500                         ret = err;
9501         }
9502
9503         if (!ret)
9504                 fprintf(stderr, "Repaired extent references for %llu\n",
9505                                 (unsigned long long)rec->start);
9506
9507         btrfs_release_path(&path);
9508         return ret;
9509 }
9510
9511 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9512                               struct extent_record *rec)
9513 {
9514         struct btrfs_trans_handle *trans;
9515         struct btrfs_root *root = fs_info->extent_root;
9516         struct btrfs_path path;
9517         struct btrfs_extent_item *ei;
9518         struct btrfs_key key;
9519         u64 flags;
9520         int ret = 0;
9521
9522         key.objectid = rec->start;
9523         if (rec->metadata) {
9524                 key.type = BTRFS_METADATA_ITEM_KEY;
9525                 key.offset = rec->info_level;
9526         } else {
9527                 key.type = BTRFS_EXTENT_ITEM_KEY;
9528                 key.offset = rec->max_size;
9529         }
9530
9531         trans = btrfs_start_transaction(root, 0);
9532         if (IS_ERR(trans))
9533                 return PTR_ERR(trans);
9534
9535         btrfs_init_path(&path);
9536         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9537         if (ret < 0) {
9538                 btrfs_release_path(&path);
9539                 btrfs_commit_transaction(trans, root);
9540                 return ret;
9541         } else if (ret) {
9542                 fprintf(stderr, "Didn't find extent for %llu\n",
9543                         (unsigned long long)rec->start);
9544                 btrfs_release_path(&path);
9545                 btrfs_commit_transaction(trans, root);
9546                 return -ENOENT;
9547         }
9548
9549         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9550                             struct btrfs_extent_item);
9551         flags = btrfs_extent_flags(path.nodes[0], ei);
9552         if (rec->flag_block_full_backref) {
9553                 fprintf(stderr, "setting full backref on %llu\n",
9554                         (unsigned long long)key.objectid);
9555                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9556         } else {
9557                 fprintf(stderr, "clearing full backref on %llu\n",
9558                         (unsigned long long)key.objectid);
9559                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9560         }
9561         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9562         btrfs_mark_buffer_dirty(path.nodes[0]);
9563         btrfs_release_path(&path);
9564         ret = btrfs_commit_transaction(trans, root);
9565         if (!ret)
9566                 fprintf(stderr, "Repaired extent flags for %llu\n",
9567                                 (unsigned long long)rec->start);
9568
9569         return ret;
9570 }
9571
9572 /* right now we only prune from the extent allocation tree */
9573 static int prune_one_block(struct btrfs_trans_handle *trans,
9574                            struct btrfs_fs_info *info,
9575                            struct btrfs_corrupt_block *corrupt)
9576 {
9577         int ret;
9578         struct btrfs_path path;
9579         struct extent_buffer *eb;
9580         u64 found;
9581         int slot;
9582         int nritems;
9583         int level = corrupt->level + 1;
9584
9585         btrfs_init_path(&path);
9586 again:
9587         /* we want to stop at the parent to our busted block */
9588         path.lowest_level = level;
9589
9590         ret = btrfs_search_slot(trans, info->extent_root,
9591                                 &corrupt->key, &path, -1, 1);
9592
9593         if (ret < 0)
9594                 goto out;
9595
9596         eb = path.nodes[level];
9597         if (!eb) {
9598                 ret = -ENOENT;
9599                 goto out;
9600         }
9601
9602         /*
9603          * hopefully the search gave us the block we want to prune,
9604          * lets try that first
9605          */
9606         slot = path.slots[level];
9607         found =  btrfs_node_blockptr(eb, slot);
9608         if (found == corrupt->cache.start)
9609                 goto del_ptr;
9610
9611         nritems = btrfs_header_nritems(eb);
9612
9613         /* the search failed, lets scan this node and hope we find it */
9614         for (slot = 0; slot < nritems; slot++) {
9615                 found =  btrfs_node_blockptr(eb, slot);
9616                 if (found == corrupt->cache.start)
9617                         goto del_ptr;
9618         }
9619         /*
9620          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9621          * to this block
9622          */
9623         if (eb == info->extent_root->node) {
9624                 ret = -ENOENT;
9625                 goto out;
9626         } else {
9627                 level++;
9628                 btrfs_release_path(&path);
9629                 goto again;
9630         }
9631
9632 del_ptr:
9633         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9634         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9635
9636 out:
9637         btrfs_release_path(&path);
9638         return ret;
9639 }
9640
9641 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9642 {
9643         struct btrfs_trans_handle *trans = NULL;
9644         struct cache_extent *cache;
9645         struct btrfs_corrupt_block *corrupt;
9646
9647         while (1) {
9648                 cache = search_cache_extent(info->corrupt_blocks, 0);
9649                 if (!cache)
9650                         break;
9651                 if (!trans) {
9652                         trans = btrfs_start_transaction(info->extent_root, 1);
9653                         if (IS_ERR(trans))
9654                                 return PTR_ERR(trans);
9655                 }
9656                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9657                 prune_one_block(trans, info, corrupt);
9658                 remove_cache_extent(info->corrupt_blocks, cache);
9659         }
9660         if (trans)
9661                 return btrfs_commit_transaction(trans, info->extent_root);
9662         return 0;
9663 }
9664
9665 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9666 {
9667         struct btrfs_block_group_cache *cache;
9668         u64 start, end;
9669         int ret;
9670
9671         while (1) {
9672                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9673                                             &start, &end, EXTENT_DIRTY);
9674                 if (ret)
9675                         break;
9676                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9677         }
9678
9679         start = 0;
9680         while (1) {
9681                 cache = btrfs_lookup_first_block_group(fs_info, start);
9682                 if (!cache)
9683                         break;
9684                 if (cache->cached)
9685                         cache->cached = 0;
9686                 start = cache->key.objectid + cache->key.offset;
9687         }
9688 }
9689
9690 static int check_extent_refs(struct btrfs_root *root,
9691                              struct cache_tree *extent_cache)
9692 {
9693         struct extent_record *rec;
9694         struct cache_extent *cache;
9695         int ret = 0;
9696         int had_dups = 0;
9697
9698         if (repair) {
9699                 /*
9700                  * if we're doing a repair, we have to make sure
9701                  * we don't allocate from the problem extents.
9702                  * In the worst case, this will be all the
9703                  * extents in the FS
9704                  */
9705                 cache = search_cache_extent(extent_cache, 0);
9706                 while(cache) {
9707                         rec = container_of(cache, struct extent_record, cache);
9708                         set_extent_dirty(root->fs_info->excluded_extents,
9709                                          rec->start,
9710                                          rec->start + rec->max_size - 1);
9711                         cache = next_cache_extent(cache);
9712                 }
9713
9714                 /* pin down all the corrupted blocks too */
9715                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9716                 while(cache) {
9717                         set_extent_dirty(root->fs_info->excluded_extents,
9718                                          cache->start,
9719                                          cache->start + cache->size - 1);
9720                         cache = next_cache_extent(cache);
9721                 }
9722                 prune_corrupt_blocks(root->fs_info);
9723                 reset_cached_block_groups(root->fs_info);
9724         }
9725
9726         reset_cached_block_groups(root->fs_info);
9727
9728         /*
9729          * We need to delete any duplicate entries we find first otherwise we
9730          * could mess up the extent tree when we have backrefs that actually
9731          * belong to a different extent item and not the weird duplicate one.
9732          */
9733         while (repair && !list_empty(&duplicate_extents)) {
9734                 rec = to_extent_record(duplicate_extents.next);
9735                 list_del_init(&rec->list);
9736
9737                 /* Sometimes we can find a backref before we find an actual
9738                  * extent, so we need to process it a little bit to see if there
9739                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9740                  * if this is a backref screwup.  If we need to delete stuff
9741                  * process_duplicates() will return 0, otherwise it will return
9742                  * 1 and we
9743                  */
9744                 if (process_duplicates(extent_cache, rec))
9745                         continue;
9746                 ret = delete_duplicate_records(root, rec);
9747                 if (ret < 0)
9748                         return ret;
9749                 /*
9750                  * delete_duplicate_records will return the number of entries
9751                  * deleted, so if it's greater than 0 then we know we actually
9752                  * did something and we need to remove.
9753                  */
9754                 if (ret)
9755                         had_dups = 1;
9756         }
9757
9758         if (had_dups)
9759                 return -EAGAIN;
9760
9761         while(1) {
9762                 int cur_err = 0;
9763                 int fix = 0;
9764
9765                 cache = search_cache_extent(extent_cache, 0);
9766                 if (!cache)
9767                         break;
9768                 rec = container_of(cache, struct extent_record, cache);
9769                 if (rec->num_duplicates) {
9770                         fprintf(stderr, "extent item %llu has multiple extent "
9771                                 "items\n", (unsigned long long)rec->start);
9772                         cur_err = 1;
9773                 }
9774
9775                 if (rec->refs != rec->extent_item_refs) {
9776                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9777                                 (unsigned long long)rec->start,
9778                                 (unsigned long long)rec->nr);
9779                         fprintf(stderr, "extent item %llu, found %llu\n",
9780                                 (unsigned long long)rec->extent_item_refs,
9781                                 (unsigned long long)rec->refs);
9782                         ret = record_orphan_data_extents(root->fs_info, rec);
9783                         if (ret < 0)
9784                                 goto repair_abort;
9785                         fix = ret;
9786                         cur_err = 1;
9787                 }
9788                 if (all_backpointers_checked(rec, 1)) {
9789                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9790                                 (unsigned long long)rec->start,
9791                                 (unsigned long long)rec->nr);
9792                         fix = 1;
9793                         cur_err = 1;
9794                 }
9795                 if (!rec->owner_ref_checked) {
9796                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9797                                 (unsigned long long)rec->start,
9798                                 (unsigned long long)rec->nr);
9799                         fix = 1;
9800                         cur_err = 1;
9801                 }
9802
9803                 if (repair && fix) {
9804                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9805                         if (ret)
9806                                 goto repair_abort;
9807                 }
9808
9809
9810                 if (rec->bad_full_backref) {
9811                         fprintf(stderr, "bad full backref, on [%llu]\n",
9812                                 (unsigned long long)rec->start);
9813                         if (repair) {
9814                                 ret = fixup_extent_flags(root->fs_info, rec);
9815                                 if (ret)
9816                                         goto repair_abort;
9817                                 fix = 1;
9818                         }
9819                         cur_err = 1;
9820                 }
9821                 /*
9822                  * Although it's not a extent ref's problem, we reuse this
9823                  * routine for error reporting.
9824                  * No repair function yet.
9825                  */
9826                 if (rec->crossing_stripes) {
9827                         fprintf(stderr,
9828                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9829                                 rec->start, rec->start + rec->max_size);
9830                         cur_err = 1;
9831                 }
9832
9833                 if (rec->wrong_chunk_type) {
9834                         fprintf(stderr,
9835                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9836                                 rec->start, rec->start + rec->max_size);
9837                         cur_err = 1;
9838                 }
9839
9840                 remove_cache_extent(extent_cache, cache);
9841                 free_all_extent_backrefs(rec);
9842                 if (!init_extent_tree && repair && (!cur_err || fix))
9843                         clear_extent_dirty(root->fs_info->excluded_extents,
9844                                            rec->start,
9845                                            rec->start + rec->max_size - 1);
9846                 free(rec);
9847         }
9848 repair_abort:
9849         if (repair) {
9850                 if (ret && ret != -EAGAIN) {
9851                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9852                         exit(1);
9853                 } else if (!ret) {
9854                         struct btrfs_trans_handle *trans;
9855
9856                         root = root->fs_info->extent_root;
9857                         trans = btrfs_start_transaction(root, 1);
9858                         if (IS_ERR(trans)) {
9859                                 ret = PTR_ERR(trans);
9860                                 goto repair_abort;
9861                         }
9862
9863                         ret = btrfs_fix_block_accounting(trans, root);
9864                         if (ret)
9865                                 goto repair_abort;
9866                         ret = btrfs_commit_transaction(trans, root);
9867                         if (ret)
9868                                 goto repair_abort;
9869                 }
9870                 return ret;
9871         }
9872         return 0;
9873 }
9874
9875 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9876 {
9877         u64 stripe_size;
9878
9879         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9880                 stripe_size = length;
9881                 stripe_size /= num_stripes;
9882         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9883                 stripe_size = length * 2;
9884                 stripe_size /= num_stripes;
9885         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9886                 stripe_size = length;
9887                 stripe_size /= (num_stripes - 1);
9888         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9889                 stripe_size = length;
9890                 stripe_size /= (num_stripes - 2);
9891         } else {
9892                 stripe_size = length;
9893         }
9894         return stripe_size;
9895 }
9896
9897 /*
9898  * Check the chunk with its block group/dev list ref:
9899  * Return 0 if all refs seems valid.
9900  * Return 1 if part of refs seems valid, need later check for rebuild ref
9901  * like missing block group and needs to search extent tree to rebuild them.
9902  * Return -1 if essential refs are missing and unable to rebuild.
9903  */
9904 static int check_chunk_refs(struct chunk_record *chunk_rec,
9905                             struct block_group_tree *block_group_cache,
9906                             struct device_extent_tree *dev_extent_cache,
9907                             int silent)
9908 {
9909         struct cache_extent *block_group_item;
9910         struct block_group_record *block_group_rec;
9911         struct cache_extent *dev_extent_item;
9912         struct device_extent_record *dev_extent_rec;
9913         u64 devid;
9914         u64 offset;
9915         u64 length;
9916         int metadump_v2 = 0;
9917         int i;
9918         int ret = 0;
9919
9920         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9921                                                chunk_rec->offset,
9922                                                chunk_rec->length);
9923         if (block_group_item) {
9924                 block_group_rec = container_of(block_group_item,
9925                                                struct block_group_record,
9926                                                cache);
9927                 if (chunk_rec->length != block_group_rec->offset ||
9928                     chunk_rec->offset != block_group_rec->objectid ||
9929                     (!metadump_v2 &&
9930                      chunk_rec->type_flags != block_group_rec->flags)) {
9931                         if (!silent)
9932                                 fprintf(stderr,
9933                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9934                                         chunk_rec->objectid,
9935                                         chunk_rec->type,
9936                                         chunk_rec->offset,
9937                                         chunk_rec->length,
9938                                         chunk_rec->offset,
9939                                         chunk_rec->type_flags,
9940                                         block_group_rec->objectid,
9941                                         block_group_rec->type,
9942                                         block_group_rec->offset,
9943                                         block_group_rec->offset,
9944                                         block_group_rec->objectid,
9945                                         block_group_rec->flags);
9946                         ret = -1;
9947                 } else {
9948                         list_del_init(&block_group_rec->list);
9949                         chunk_rec->bg_rec = block_group_rec;
9950                 }
9951         } else {
9952                 if (!silent)
9953                         fprintf(stderr,
9954                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9955                                 chunk_rec->objectid,
9956                                 chunk_rec->type,
9957                                 chunk_rec->offset,
9958                                 chunk_rec->length,
9959                                 chunk_rec->offset,
9960                                 chunk_rec->type_flags);
9961                 ret = 1;
9962         }
9963
9964         if (metadump_v2)
9965                 return ret;
9966
9967         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9968                                     chunk_rec->num_stripes);
9969         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9970                 devid = chunk_rec->stripes[i].devid;
9971                 offset = chunk_rec->stripes[i].offset;
9972                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9973                                                        devid, offset, length);
9974                 if (dev_extent_item) {
9975                         dev_extent_rec = container_of(dev_extent_item,
9976                                                 struct device_extent_record,
9977                                                 cache);
9978                         if (dev_extent_rec->objectid != devid ||
9979                             dev_extent_rec->offset != offset ||
9980                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9981                             dev_extent_rec->length != length) {
9982                                 if (!silent)
9983                                         fprintf(stderr,
9984                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9985                                                 chunk_rec->objectid,
9986                                                 chunk_rec->type,
9987                                                 chunk_rec->offset,
9988                                                 chunk_rec->stripes[i].devid,
9989                                                 chunk_rec->stripes[i].offset,
9990                                                 dev_extent_rec->objectid,
9991                                                 dev_extent_rec->offset,
9992                                                 dev_extent_rec->length);
9993                                 ret = -1;
9994                         } else {
9995                                 list_move(&dev_extent_rec->chunk_list,
9996                                           &chunk_rec->dextents);
9997                         }
9998                 } else {
9999                         if (!silent)
10000                                 fprintf(stderr,
10001                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10002                                         chunk_rec->objectid,
10003                                         chunk_rec->type,
10004                                         chunk_rec->offset,
10005                                         chunk_rec->stripes[i].devid,
10006                                         chunk_rec->stripes[i].offset);
10007                         ret = -1;
10008                 }
10009         }
10010         return ret;
10011 }
10012
10013 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10014 int check_chunks(struct cache_tree *chunk_cache,
10015                  struct block_group_tree *block_group_cache,
10016                  struct device_extent_tree *dev_extent_cache,
10017                  struct list_head *good, struct list_head *bad,
10018                  struct list_head *rebuild, int silent)
10019 {
10020         struct cache_extent *chunk_item;
10021         struct chunk_record *chunk_rec;
10022         struct block_group_record *bg_rec;
10023         struct device_extent_record *dext_rec;
10024         int err;
10025         int ret = 0;
10026
10027         chunk_item = first_cache_extent(chunk_cache);
10028         while (chunk_item) {
10029                 chunk_rec = container_of(chunk_item, struct chunk_record,
10030                                          cache);
10031                 err = check_chunk_refs(chunk_rec, block_group_cache,
10032                                        dev_extent_cache, silent);
10033                 if (err < 0)
10034                         ret = err;
10035                 if (err == 0 && good)
10036                         list_add_tail(&chunk_rec->list, good);
10037                 if (err > 0 && rebuild)
10038                         list_add_tail(&chunk_rec->list, rebuild);
10039                 if (err < 0 && bad)
10040                         list_add_tail(&chunk_rec->list, bad);
10041                 chunk_item = next_cache_extent(chunk_item);
10042         }
10043
10044         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10045                 if (!silent)
10046                         fprintf(stderr,
10047                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10048                                 bg_rec->objectid,
10049                                 bg_rec->offset,
10050                                 bg_rec->flags);
10051                 if (!ret)
10052                         ret = 1;
10053         }
10054
10055         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10056                             chunk_list) {
10057                 if (!silent)
10058                         fprintf(stderr,
10059                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10060                                 dext_rec->objectid,
10061                                 dext_rec->offset,
10062                                 dext_rec->length);
10063                 if (!ret)
10064                         ret = 1;
10065         }
10066         return ret;
10067 }
10068
10069
10070 static int check_device_used(struct device_record *dev_rec,
10071                              struct device_extent_tree *dext_cache)
10072 {
10073         struct cache_extent *cache;
10074         struct device_extent_record *dev_extent_rec;
10075         u64 total_byte = 0;
10076
10077         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10078         while (cache) {
10079                 dev_extent_rec = container_of(cache,
10080                                               struct device_extent_record,
10081                                               cache);
10082                 if (dev_extent_rec->objectid != dev_rec->devid)
10083                         break;
10084
10085                 list_del_init(&dev_extent_rec->device_list);
10086                 total_byte += dev_extent_rec->length;
10087                 cache = next_cache_extent(cache);
10088         }
10089
10090         if (total_byte != dev_rec->byte_used) {
10091                 fprintf(stderr,
10092                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10093                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10094                         dev_rec->type, dev_rec->offset);
10095                 return -1;
10096         } else {
10097                 return 0;
10098         }
10099 }
10100
10101 /* check btrfs_dev_item -> btrfs_dev_extent */
10102 static int check_devices(struct rb_root *dev_cache,
10103                          struct device_extent_tree *dev_extent_cache)
10104 {
10105         struct rb_node *dev_node;
10106         struct device_record *dev_rec;
10107         struct device_extent_record *dext_rec;
10108         int err;
10109         int ret = 0;
10110
10111         dev_node = rb_first(dev_cache);
10112         while (dev_node) {
10113                 dev_rec = container_of(dev_node, struct device_record, node);
10114                 err = check_device_used(dev_rec, dev_extent_cache);
10115                 if (err)
10116                         ret = err;
10117
10118                 dev_node = rb_next(dev_node);
10119         }
10120         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10121                             device_list) {
10122                 fprintf(stderr,
10123                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10124                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10125                 if (!ret)
10126                         ret = 1;
10127         }
10128         return ret;
10129 }
10130
10131 static int add_root_item_to_list(struct list_head *head,
10132                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10133                                   u8 level, u8 drop_level,
10134                                   struct btrfs_key *drop_key)
10135 {
10136
10137         struct root_item_record *ri_rec;
10138         ri_rec = malloc(sizeof(*ri_rec));
10139         if (!ri_rec)
10140                 return -ENOMEM;
10141         ri_rec->bytenr = bytenr;
10142         ri_rec->objectid = objectid;
10143         ri_rec->level = level;
10144         ri_rec->drop_level = drop_level;
10145         ri_rec->last_snapshot = last_snapshot;
10146         if (drop_key)
10147                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10148         list_add_tail(&ri_rec->list, head);
10149
10150         return 0;
10151 }
10152
10153 static void free_root_item_list(struct list_head *list)
10154 {
10155         struct root_item_record *ri_rec;
10156
10157         while (!list_empty(list)) {
10158                 ri_rec = list_first_entry(list, struct root_item_record,
10159                                           list);
10160                 list_del_init(&ri_rec->list);
10161                 free(ri_rec);
10162         }
10163 }
10164
10165 static int deal_root_from_list(struct list_head *list,
10166                                struct btrfs_root *root,
10167                                struct block_info *bits,
10168                                int bits_nr,
10169                                struct cache_tree *pending,
10170                                struct cache_tree *seen,
10171                                struct cache_tree *reada,
10172                                struct cache_tree *nodes,
10173                                struct cache_tree *extent_cache,
10174                                struct cache_tree *chunk_cache,
10175                                struct rb_root *dev_cache,
10176                                struct block_group_tree *block_group_cache,
10177                                struct device_extent_tree *dev_extent_cache)
10178 {
10179         int ret = 0;
10180         u64 last;
10181
10182         while (!list_empty(list)) {
10183                 struct root_item_record *rec;
10184                 struct extent_buffer *buf;
10185                 rec = list_entry(list->next,
10186                                  struct root_item_record, list);
10187                 last = 0;
10188                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10189                 if (!extent_buffer_uptodate(buf)) {
10190                         free_extent_buffer(buf);
10191                         ret = -EIO;
10192                         break;
10193                 }
10194                 ret = add_root_to_pending(buf, extent_cache, pending,
10195                                     seen, nodes, rec->objectid);
10196                 if (ret < 0)
10197                         break;
10198                 /*
10199                  * To rebuild extent tree, we need deal with snapshot
10200                  * one by one, otherwise we deal with node firstly which
10201                  * can maximize readahead.
10202                  */
10203                 while (1) {
10204                         ret = run_next_block(root, bits, bits_nr, &last,
10205                                              pending, seen, reada, nodes,
10206                                              extent_cache, chunk_cache,
10207                                              dev_cache, block_group_cache,
10208                                              dev_extent_cache, rec);
10209                         if (ret != 0)
10210                                 break;
10211                 }
10212                 free_extent_buffer(buf);
10213                 list_del(&rec->list);
10214                 free(rec);
10215                 if (ret < 0)
10216                         break;
10217         }
10218         while (ret >= 0) {
10219                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10220                                      reada, nodes, extent_cache, chunk_cache,
10221                                      dev_cache, block_group_cache,
10222                                      dev_extent_cache, NULL);
10223                 if (ret != 0) {
10224                         if (ret > 0)
10225                                 ret = 0;
10226                         break;
10227                 }
10228         }
10229         return ret;
10230 }
10231
10232 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10233 {
10234         struct rb_root dev_cache;
10235         struct cache_tree chunk_cache;
10236         struct block_group_tree block_group_cache;
10237         struct device_extent_tree dev_extent_cache;
10238         struct cache_tree extent_cache;
10239         struct cache_tree seen;
10240         struct cache_tree pending;
10241         struct cache_tree reada;
10242         struct cache_tree nodes;
10243         struct extent_io_tree excluded_extents;
10244         struct cache_tree corrupt_blocks;
10245         struct btrfs_path path;
10246         struct btrfs_key key;
10247         struct btrfs_key found_key;
10248         int ret, err = 0;
10249         struct block_info *bits;
10250         int bits_nr;
10251         struct extent_buffer *leaf;
10252         int slot;
10253         struct btrfs_root_item ri;
10254         struct list_head dropping_trees;
10255         struct list_head normal_trees;
10256         struct btrfs_root *root1;
10257         struct btrfs_root *root;
10258         u64 objectid;
10259         u8 level;
10260
10261         root = fs_info->fs_root;
10262         dev_cache = RB_ROOT;
10263         cache_tree_init(&chunk_cache);
10264         block_group_tree_init(&block_group_cache);
10265         device_extent_tree_init(&dev_extent_cache);
10266
10267         cache_tree_init(&extent_cache);
10268         cache_tree_init(&seen);
10269         cache_tree_init(&pending);
10270         cache_tree_init(&nodes);
10271         cache_tree_init(&reada);
10272         cache_tree_init(&corrupt_blocks);
10273         extent_io_tree_init(&excluded_extents);
10274         INIT_LIST_HEAD(&dropping_trees);
10275         INIT_LIST_HEAD(&normal_trees);
10276
10277         if (repair) {
10278                 fs_info->excluded_extents = &excluded_extents;
10279                 fs_info->fsck_extent_cache = &extent_cache;
10280                 fs_info->free_extent_hook = free_extent_hook;
10281                 fs_info->corrupt_blocks = &corrupt_blocks;
10282         }
10283
10284         bits_nr = 1024;
10285         bits = malloc(bits_nr * sizeof(struct block_info));
10286         if (!bits) {
10287                 perror("malloc");
10288                 exit(1);
10289         }
10290
10291         if (ctx.progress_enabled) {
10292                 ctx.tp = TASK_EXTENTS;
10293                 task_start(ctx.info);
10294         }
10295
10296 again:
10297         root1 = fs_info->tree_root;
10298         level = btrfs_header_level(root1->node);
10299         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10300                                     root1->node->start, 0, level, 0, NULL);
10301         if (ret < 0)
10302                 goto out;
10303         root1 = fs_info->chunk_root;
10304         level = btrfs_header_level(root1->node);
10305         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10306                                     root1->node->start, 0, level, 0, NULL);
10307         if (ret < 0)
10308                 goto out;
10309         btrfs_init_path(&path);
10310         key.offset = 0;
10311         key.objectid = 0;
10312         key.type = BTRFS_ROOT_ITEM_KEY;
10313         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10314         if (ret < 0)
10315                 goto out;
10316         while(1) {
10317                 leaf = path.nodes[0];
10318                 slot = path.slots[0];
10319                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10320                         ret = btrfs_next_leaf(root, &path);
10321                         if (ret != 0)
10322                                 break;
10323                         leaf = path.nodes[0];
10324                         slot = path.slots[0];
10325                 }
10326                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10327                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10328                         unsigned long offset;
10329                         u64 last_snapshot;
10330
10331                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10332                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10333                         last_snapshot = btrfs_root_last_snapshot(&ri);
10334                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10335                                 level = btrfs_root_level(&ri);
10336                                 ret = add_root_item_to_list(&normal_trees,
10337                                                 found_key.objectid,
10338                                                 btrfs_root_bytenr(&ri),
10339                                                 last_snapshot, level,
10340                                                 0, NULL);
10341                                 if (ret < 0)
10342                                         goto out;
10343                         } else {
10344                                 level = btrfs_root_level(&ri);
10345                                 objectid = found_key.objectid;
10346                                 btrfs_disk_key_to_cpu(&found_key,
10347                                                       &ri.drop_progress);
10348                                 ret = add_root_item_to_list(&dropping_trees,
10349                                                 objectid,
10350                                                 btrfs_root_bytenr(&ri),
10351                                                 last_snapshot, level,
10352                                                 ri.drop_level, &found_key);
10353                                 if (ret < 0)
10354                                         goto out;
10355                         }
10356                 }
10357                 path.slots[0]++;
10358         }
10359         btrfs_release_path(&path);
10360
10361         /*
10362          * check_block can return -EAGAIN if it fixes something, please keep
10363          * this in mind when dealing with return values from these functions, if
10364          * we get -EAGAIN we want to fall through and restart the loop.
10365          */
10366         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10367                                   &seen, &reada, &nodes, &extent_cache,
10368                                   &chunk_cache, &dev_cache, &block_group_cache,
10369                                   &dev_extent_cache);
10370         if (ret < 0) {
10371                 if (ret == -EAGAIN)
10372                         goto loop;
10373                 goto out;
10374         }
10375         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10376                                   &pending, &seen, &reada, &nodes,
10377                                   &extent_cache, &chunk_cache, &dev_cache,
10378                                   &block_group_cache, &dev_extent_cache);
10379         if (ret < 0) {
10380                 if (ret == -EAGAIN)
10381                         goto loop;
10382                 goto out;
10383         }
10384
10385         ret = check_chunks(&chunk_cache, &block_group_cache,
10386                            &dev_extent_cache, NULL, NULL, NULL, 0);
10387         if (ret) {
10388                 if (ret == -EAGAIN)
10389                         goto loop;
10390                 err = ret;
10391         }
10392
10393         ret = check_extent_refs(root, &extent_cache);
10394         if (ret < 0) {
10395                 if (ret == -EAGAIN)
10396                         goto loop;
10397                 goto out;
10398         }
10399
10400         ret = check_devices(&dev_cache, &dev_extent_cache);
10401         if (ret && err)
10402                 ret = err;
10403
10404 out:
10405         task_stop(ctx.info);
10406         if (repair) {
10407                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10408                 extent_io_tree_cleanup(&excluded_extents);
10409                 fs_info->fsck_extent_cache = NULL;
10410                 fs_info->free_extent_hook = NULL;
10411                 fs_info->corrupt_blocks = NULL;
10412                 fs_info->excluded_extents = NULL;
10413         }
10414         free(bits);
10415         free_chunk_cache_tree(&chunk_cache);
10416         free_device_cache_tree(&dev_cache);
10417         free_block_group_tree(&block_group_cache);
10418         free_device_extent_tree(&dev_extent_cache);
10419         free_extent_cache_tree(&seen);
10420         free_extent_cache_tree(&pending);
10421         free_extent_cache_tree(&reada);
10422         free_extent_cache_tree(&nodes);
10423         free_root_item_list(&normal_trees);
10424         free_root_item_list(&dropping_trees);
10425         return ret;
10426 loop:
10427         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10428         free_extent_cache_tree(&seen);
10429         free_extent_cache_tree(&pending);
10430         free_extent_cache_tree(&reada);
10431         free_extent_cache_tree(&nodes);
10432         free_chunk_cache_tree(&chunk_cache);
10433         free_block_group_tree(&block_group_cache);
10434         free_device_cache_tree(&dev_cache);
10435         free_device_extent_tree(&dev_extent_cache);
10436         free_extent_record_cache(&extent_cache);
10437         free_root_item_list(&normal_trees);
10438         free_root_item_list(&dropping_trees);
10439         extent_io_tree_cleanup(&excluded_extents);
10440         goto again;
10441 }
10442
10443 /*
10444  * Check backrefs of a tree block given by @bytenr or @eb.
10445  *
10446  * @root:       the root containing the @bytenr or @eb
10447  * @eb:         tree block extent buffer, can be NULL
10448  * @bytenr:     bytenr of the tree block to search
10449  * @level:      tree level of the tree block
10450  * @owner:      owner of the tree block
10451  *
10452  * Return >0 for any error found and output error message
10453  * Return 0 for no error found
10454  */
10455 static int check_tree_block_ref(struct btrfs_root *root,
10456                                 struct extent_buffer *eb, u64 bytenr,
10457                                 int level, u64 owner)
10458 {
10459         struct btrfs_key key;
10460         struct btrfs_root *extent_root = root->fs_info->extent_root;
10461         struct btrfs_path path;
10462         struct btrfs_extent_item *ei;
10463         struct btrfs_extent_inline_ref *iref;
10464         struct extent_buffer *leaf;
10465         unsigned long end;
10466         unsigned long ptr;
10467         int slot;
10468         int skinny_level;
10469         int type;
10470         u32 nodesize = root->fs_info->nodesize;
10471         u32 item_size;
10472         u64 offset;
10473         int tree_reloc_root = 0;
10474         int found_ref = 0;
10475         int err = 0;
10476         int ret;
10477
10478         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10479             btrfs_header_bytenr(root->node) == bytenr)
10480                 tree_reloc_root = 1;
10481
10482         btrfs_init_path(&path);
10483         key.objectid = bytenr;
10484         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10485                 key.type = BTRFS_METADATA_ITEM_KEY;
10486         else
10487                 key.type = BTRFS_EXTENT_ITEM_KEY;
10488         key.offset = (u64)-1;
10489
10490         /* Search for the backref in extent tree */
10491         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10492         if (ret < 0) {
10493                 err |= BACKREF_MISSING;
10494                 goto out;
10495         }
10496         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10497         if (ret) {
10498                 err |= BACKREF_MISSING;
10499                 goto out;
10500         }
10501
10502         leaf = path.nodes[0];
10503         slot = path.slots[0];
10504         btrfs_item_key_to_cpu(leaf, &key, slot);
10505
10506         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10507
10508         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10509                 skinny_level = (int)key.offset;
10510                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10511         } else {
10512                 struct btrfs_tree_block_info *info;
10513
10514                 info = (struct btrfs_tree_block_info *)(ei + 1);
10515                 skinny_level = btrfs_tree_block_level(leaf, info);
10516                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10517         }
10518
10519         if (eb) {
10520                 u64 header_gen;
10521                 u64 extent_gen;
10522
10523                 if (!(btrfs_extent_flags(leaf, ei) &
10524                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10525                         error(
10526                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10527                                 key.objectid, nodesize,
10528                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10529                         err = BACKREF_MISMATCH;
10530                 }
10531                 header_gen = btrfs_header_generation(eb);
10532                 extent_gen = btrfs_extent_generation(leaf, ei);
10533                 if (header_gen != extent_gen) {
10534                         error(
10535         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10536                                 key.objectid, nodesize, header_gen,
10537                                 extent_gen);
10538                         err = BACKREF_MISMATCH;
10539                 }
10540                 if (level != skinny_level) {
10541                         error(
10542                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10543                                 key.objectid, nodesize, level, skinny_level);
10544                         err = BACKREF_MISMATCH;
10545                 }
10546                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10547                         error(
10548                         "extent[%llu %u] is referred by other roots than %llu",
10549                                 key.objectid, nodesize, root->objectid);
10550                         err = BACKREF_MISMATCH;
10551                 }
10552         }
10553
10554         /*
10555          * Iterate the extent/metadata item to find the exact backref
10556          */
10557         item_size = btrfs_item_size_nr(leaf, slot);
10558         ptr = (unsigned long)iref;
10559         end = (unsigned long)ei + item_size;
10560         while (ptr < end) {
10561                 iref = (struct btrfs_extent_inline_ref *)ptr;
10562                 type = btrfs_extent_inline_ref_type(leaf, iref);
10563                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10564
10565                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10566                         (offset == root->objectid || offset == owner)) {
10567                         found_ref = 1;
10568                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10569                         /*
10570                          * Backref of tree reloc root points to itself, no need
10571                          * to check backref any more.
10572                          */
10573                         if (tree_reloc_root)
10574                                 found_ref = 1;
10575                         else
10576                         /* Check if the backref points to valid referencer */
10577                                 found_ref = !check_tree_block_ref(root, NULL,
10578                                                 offset, level + 1, owner);
10579                 }
10580
10581                 if (found_ref)
10582                         break;
10583                 ptr += btrfs_extent_inline_ref_size(type);
10584         }
10585
10586         /*
10587          * Inlined extent item doesn't have what we need, check
10588          * TREE_BLOCK_REF_KEY
10589          */
10590         if (!found_ref) {
10591                 btrfs_release_path(&path);
10592                 key.objectid = bytenr;
10593                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10594                 key.offset = root->objectid;
10595
10596                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10597                 if (!ret)
10598                         found_ref = 1;
10599         }
10600         if (!found_ref)
10601                 err |= BACKREF_MISSING;
10602 out:
10603         btrfs_release_path(&path);
10604         if (eb && (err & BACKREF_MISSING))
10605                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10606                         bytenr, nodesize, owner, level);
10607         return err;
10608 }
10609
10610 /*
10611  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10612  *
10613  * Return >0 any error found and output error message
10614  * Return 0 for no error found
10615  */
10616 static int check_extent_data_item(struct btrfs_root *root,
10617                                   struct extent_buffer *eb, int slot)
10618 {
10619         struct btrfs_file_extent_item *fi;
10620         struct btrfs_path path;
10621         struct btrfs_root *extent_root = root->fs_info->extent_root;
10622         struct btrfs_key fi_key;
10623         struct btrfs_key dbref_key;
10624         struct extent_buffer *leaf;
10625         struct btrfs_extent_item *ei;
10626         struct btrfs_extent_inline_ref *iref;
10627         struct btrfs_extent_data_ref *dref;
10628         u64 owner;
10629         u64 disk_bytenr;
10630         u64 disk_num_bytes;
10631         u64 extent_num_bytes;
10632         u64 extent_flags;
10633         u32 item_size;
10634         unsigned long end;
10635         unsigned long ptr;
10636         int type;
10637         u64 ref_root;
10638         int found_dbackref = 0;
10639         int err = 0;
10640         int ret;
10641
10642         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10643         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10644
10645         /* Nothing to check for hole and inline data extents */
10646         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10647             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10648                 return 0;
10649
10650         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10651         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10652         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10653
10654         /* Check unaligned disk_num_bytes and num_bytes */
10655         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10656                 error(
10657 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10658                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10659                         root->fs_info->sectorsize);
10660                 err |= BYTES_UNALIGNED;
10661         } else {
10662                 data_bytes_allocated += disk_num_bytes;
10663         }
10664         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10665                 error(
10666 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10667                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10668                         root->fs_info->sectorsize);
10669                 err |= BYTES_UNALIGNED;
10670         } else {
10671                 data_bytes_referenced += extent_num_bytes;
10672         }
10673         owner = btrfs_header_owner(eb);
10674
10675         /* Check the extent item of the file extent in extent tree */
10676         btrfs_init_path(&path);
10677         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10678         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10679         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10680
10681         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10682         if (ret)
10683                 goto out;
10684
10685         leaf = path.nodes[0];
10686         slot = path.slots[0];
10687         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10688
10689         extent_flags = btrfs_extent_flags(leaf, ei);
10690
10691         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10692                 error(
10693                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10694                     disk_bytenr, disk_num_bytes,
10695                     BTRFS_EXTENT_FLAG_DATA);
10696                 err |= BACKREF_MISMATCH;
10697         }
10698
10699         /* Check data backref inside that extent item */
10700         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10701         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10702         ptr = (unsigned long)iref;
10703         end = (unsigned long)ei + item_size;
10704         while (ptr < end) {
10705                 iref = (struct btrfs_extent_inline_ref *)ptr;
10706                 type = btrfs_extent_inline_ref_type(leaf, iref);
10707                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10708
10709                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10710                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10711                         if (ref_root == owner || ref_root == root->objectid)
10712                                 found_dbackref = 1;
10713                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10714                         found_dbackref = !check_tree_block_ref(root, NULL,
10715                                 btrfs_extent_inline_ref_offset(leaf, iref),
10716                                 0, owner);
10717                 }
10718
10719                 if (found_dbackref)
10720                         break;
10721                 ptr += btrfs_extent_inline_ref_size(type);
10722         }
10723
10724         if (!found_dbackref) {
10725                 btrfs_release_path(&path);
10726
10727                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10728                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10729                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10730                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10731                                 fi_key.objectid, fi_key.offset);
10732
10733                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10734                                         &dbref_key, &path, 0, 0);
10735                 if (!ret) {
10736                         found_dbackref = 1;
10737                         goto out;
10738                 }
10739
10740                 btrfs_release_path(&path);
10741
10742                 /*
10743                  * Neither inlined nor EXTENT_DATA_REF found, try
10744                  * SHARED_DATA_REF as last chance.
10745                  */
10746                 dbref_key.objectid = disk_bytenr;
10747                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10748                 dbref_key.offset = eb->start;
10749
10750                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10751                                         &dbref_key, &path, 0, 0);
10752                 if (!ret) {
10753                         found_dbackref = 1;
10754                         goto out;
10755                 }
10756         }
10757
10758 out:
10759         if (!found_dbackref)
10760                 err |= BACKREF_MISSING;
10761         btrfs_release_path(&path);
10762         if (err & BACKREF_MISSING) {
10763                 error("data extent[%llu %llu] backref lost",
10764                       disk_bytenr, disk_num_bytes);
10765         }
10766         return err;
10767 }
10768
10769 /*
10770  * Get real tree block level for the case like shared block
10771  * Return >= 0 as tree level
10772  * Return <0 for error
10773  */
10774 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10775 {
10776         struct extent_buffer *eb;
10777         struct btrfs_path path;
10778         struct btrfs_key key;
10779         struct btrfs_extent_item *ei;
10780         u64 flags;
10781         u64 transid;
10782         u8 backref_level;
10783         u8 header_level;
10784         int ret;
10785
10786         /* Search extent tree for extent generation and level */
10787         key.objectid = bytenr;
10788         key.type = BTRFS_METADATA_ITEM_KEY;
10789         key.offset = (u64)-1;
10790
10791         btrfs_init_path(&path);
10792         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10793         if (ret < 0)
10794                 goto release_out;
10795         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10796         if (ret < 0)
10797                 goto release_out;
10798         if (ret > 0) {
10799                 ret = -ENOENT;
10800                 goto release_out;
10801         }
10802
10803         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10804         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10805                             struct btrfs_extent_item);
10806         flags = btrfs_extent_flags(path.nodes[0], ei);
10807         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10808                 ret = -ENOENT;
10809                 goto release_out;
10810         }
10811
10812         /* Get transid for later read_tree_block() check */
10813         transid = btrfs_extent_generation(path.nodes[0], ei);
10814
10815         /* Get backref level as one source */
10816         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10817                 backref_level = key.offset;
10818         } else {
10819                 struct btrfs_tree_block_info *info;
10820
10821                 info = (struct btrfs_tree_block_info *)(ei + 1);
10822                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10823         }
10824         btrfs_release_path(&path);
10825
10826         /* Get level from tree block as an alternative source */
10827         eb = read_tree_block(fs_info, bytenr, transid);
10828         if (!extent_buffer_uptodate(eb)) {
10829                 free_extent_buffer(eb);
10830                 return -EIO;
10831         }
10832         header_level = btrfs_header_level(eb);
10833         free_extent_buffer(eb);
10834
10835         if (header_level != backref_level)
10836                 return -EIO;
10837         return header_level;
10838
10839 release_out:
10840         btrfs_release_path(&path);
10841         return ret;
10842 }
10843
10844 /*
10845  * Check if a tree block backref is valid (points to a valid tree block)
10846  * if level == -1, level will be resolved
10847  * Return >0 for any error found and print error message
10848  */
10849 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10850                                     u64 bytenr, int level)
10851 {
10852         struct btrfs_root *root;
10853         struct btrfs_key key;
10854         struct btrfs_path path;
10855         struct extent_buffer *eb;
10856         struct extent_buffer *node;
10857         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10858         int err = 0;
10859         int ret;
10860
10861         /* Query level for level == -1 special case */
10862         if (level == -1)
10863                 level = query_tree_block_level(fs_info, bytenr);
10864         if (level < 0) {
10865                 err |= REFERENCER_MISSING;
10866                 goto out;
10867         }
10868
10869         key.objectid = root_id;
10870         key.type = BTRFS_ROOT_ITEM_KEY;
10871         key.offset = (u64)-1;
10872
10873         root = btrfs_read_fs_root(fs_info, &key);
10874         if (IS_ERR(root)) {
10875                 err |= REFERENCER_MISSING;
10876                 goto out;
10877         }
10878
10879         /* Read out the tree block to get item/node key */
10880         eb = read_tree_block(fs_info, bytenr, 0);
10881         if (!extent_buffer_uptodate(eb)) {
10882                 err |= REFERENCER_MISSING;
10883                 free_extent_buffer(eb);
10884                 goto out;
10885         }
10886
10887         /* Empty tree, no need to check key */
10888         if (!btrfs_header_nritems(eb) && !level) {
10889                 free_extent_buffer(eb);
10890                 goto out;
10891         }
10892
10893         if (level)
10894                 btrfs_node_key_to_cpu(eb, &key, 0);
10895         else
10896                 btrfs_item_key_to_cpu(eb, &key, 0);
10897
10898         free_extent_buffer(eb);
10899
10900         btrfs_init_path(&path);
10901         path.lowest_level = level;
10902         /* Search with the first key, to ensure we can reach it */
10903         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10904         if (ret < 0) {
10905                 err |= REFERENCER_MISSING;
10906                 goto release_out;
10907         }
10908
10909         node = path.nodes[level];
10910         if (btrfs_header_bytenr(node) != bytenr) {
10911                 error(
10912         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10913                         bytenr, nodesize, bytenr,
10914                         btrfs_header_bytenr(node));
10915                 err |= REFERENCER_MISMATCH;
10916         }
10917         if (btrfs_header_level(node) != level) {
10918                 error(
10919         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10920                         bytenr, nodesize, level,
10921                         btrfs_header_level(node));
10922                 err |= REFERENCER_MISMATCH;
10923         }
10924
10925 release_out:
10926         btrfs_release_path(&path);
10927 out:
10928         if (err & REFERENCER_MISSING) {
10929                 if (level < 0)
10930                         error("extent [%llu %d] lost referencer (owner: %llu)",
10931                                 bytenr, nodesize, root_id);
10932                 else
10933                         error(
10934                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10935                                 bytenr, nodesize, root_id, level);
10936         }
10937
10938         return err;
10939 }
10940
10941 /*
10942  * Check if tree block @eb is tree reloc root.
10943  * Return 0 if it's not or any problem happens
10944  * Return 1 if it's a tree reloc root
10945  */
10946 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10947                                  struct extent_buffer *eb)
10948 {
10949         struct btrfs_root *tree_reloc_root;
10950         struct btrfs_key key;
10951         u64 bytenr = btrfs_header_bytenr(eb);
10952         u64 owner = btrfs_header_owner(eb);
10953         int ret = 0;
10954
10955         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10956         key.offset = owner;
10957         key.type = BTRFS_ROOT_ITEM_KEY;
10958
10959         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10960         if (IS_ERR(tree_reloc_root))
10961                 return 0;
10962
10963         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10964                 ret = 1;
10965         btrfs_free_fs_root(tree_reloc_root);
10966         return ret;
10967 }
10968
10969 /*
10970  * Check referencer for shared block backref
10971  * If level == -1, this function will resolve the level.
10972  */
10973 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10974                                      u64 parent, u64 bytenr, int level)
10975 {
10976         struct extent_buffer *eb;
10977         u32 nr;
10978         int found_parent = 0;
10979         int i;
10980
10981         eb = read_tree_block(fs_info, parent, 0);
10982         if (!extent_buffer_uptodate(eb))
10983                 goto out;
10984
10985         if (level == -1)
10986                 level = query_tree_block_level(fs_info, bytenr);
10987         if (level < 0)
10988                 goto out;
10989
10990         /* It's possible it's a tree reloc root */
10991         if (parent == bytenr) {
10992                 if (is_tree_reloc_root(fs_info, eb))
10993                         found_parent = 1;
10994                 goto out;
10995         }
10996
10997         if (level + 1 != btrfs_header_level(eb))
10998                 goto out;
10999
11000         nr = btrfs_header_nritems(eb);
11001         for (i = 0; i < nr; i++) {
11002                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11003                         found_parent = 1;
11004                         break;
11005                 }
11006         }
11007 out:
11008         free_extent_buffer(eb);
11009         if (!found_parent) {
11010                 error(
11011         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11012                         bytenr, fs_info->nodesize, parent, level);
11013                 return REFERENCER_MISSING;
11014         }
11015         return 0;
11016 }
11017
11018 /*
11019  * Check referencer for normal (inlined) data ref
11020  * If len == 0, it will be resolved by searching in extent tree
11021  */
11022 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11023                                      u64 root_id, u64 objectid, u64 offset,
11024                                      u64 bytenr, u64 len, u32 count)
11025 {
11026         struct btrfs_root *root;
11027         struct btrfs_root *extent_root = fs_info->extent_root;
11028         struct btrfs_key key;
11029         struct btrfs_path path;
11030         struct extent_buffer *leaf;
11031         struct btrfs_file_extent_item *fi;
11032         u32 found_count = 0;
11033         int slot;
11034         int ret = 0;
11035
11036         if (!len) {
11037                 key.objectid = bytenr;
11038                 key.type = BTRFS_EXTENT_ITEM_KEY;
11039                 key.offset = (u64)-1;
11040
11041                 btrfs_init_path(&path);
11042                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11043                 if (ret < 0)
11044                         goto out;
11045                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11046                 if (ret)
11047                         goto out;
11048                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11049                 if (key.objectid != bytenr ||
11050                     key.type != BTRFS_EXTENT_ITEM_KEY)
11051                         goto out;
11052                 len = key.offset;
11053                 btrfs_release_path(&path);
11054         }
11055         key.objectid = root_id;
11056         key.type = BTRFS_ROOT_ITEM_KEY;
11057         key.offset = (u64)-1;
11058         btrfs_init_path(&path);
11059
11060         root = btrfs_read_fs_root(fs_info, &key);
11061         if (IS_ERR(root))
11062                 goto out;
11063
11064         key.objectid = objectid;
11065         key.type = BTRFS_EXTENT_DATA_KEY;
11066         /*
11067          * It can be nasty as data backref offset is
11068          * file offset - file extent offset, which is smaller or
11069          * equal to original backref offset.  The only special case is
11070          * overflow.  So we need to special check and do further search.
11071          */
11072         key.offset = offset & (1ULL << 63) ? 0 : offset;
11073
11074         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11075         if (ret < 0)
11076                 goto out;
11077
11078         /*
11079          * Search afterwards to get correct one
11080          * NOTE: As we must do a comprehensive check on the data backref to
11081          * make sure the dref count also matches, we must iterate all file
11082          * extents for that inode.
11083          */
11084         while (1) {
11085                 leaf = path.nodes[0];
11086                 slot = path.slots[0];
11087
11088                 if (slot >= btrfs_header_nritems(leaf))
11089                         goto next;
11090                 btrfs_item_key_to_cpu(leaf, &key, slot);
11091                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11092                         break;
11093                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11094                 /*
11095                  * Except normal disk bytenr and disk num bytes, we still
11096                  * need to do extra check on dbackref offset as
11097                  * dbackref offset = file_offset - file_extent_offset
11098                  */
11099                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11100                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11101                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11102                     offset)
11103                         found_count++;
11104
11105 next:
11106                 ret = btrfs_next_item(root, &path);
11107                 if (ret)
11108                         break;
11109         }
11110 out:
11111         btrfs_release_path(&path);
11112         if (found_count != count) {
11113                 error(
11114 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11115                         bytenr, len, root_id, objectid, offset, count, found_count);
11116                 return REFERENCER_MISSING;
11117         }
11118         return 0;
11119 }
11120
11121 /*
11122  * Check if the referencer of a shared data backref exists
11123  */
11124 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11125                                      u64 parent, u64 bytenr)
11126 {
11127         struct extent_buffer *eb;
11128         struct btrfs_key key;
11129         struct btrfs_file_extent_item *fi;
11130         u32 nr;
11131         int found_parent = 0;
11132         int i;
11133
11134         eb = read_tree_block(fs_info, parent, 0);
11135         if (!extent_buffer_uptodate(eb))
11136                 goto out;
11137
11138         nr = btrfs_header_nritems(eb);
11139         for (i = 0; i < nr; i++) {
11140                 btrfs_item_key_to_cpu(eb, &key, i);
11141                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11142                         continue;
11143
11144                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11145                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11146                         continue;
11147
11148                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11149                         found_parent = 1;
11150                         break;
11151                 }
11152         }
11153
11154 out:
11155         free_extent_buffer(eb);
11156         if (!found_parent) {
11157                 error("shared extent %llu referencer lost (parent: %llu)",
11158                         bytenr, parent);
11159                 return REFERENCER_MISSING;
11160         }
11161         return 0;
11162 }
11163
11164 /*
11165  * This function will check a given extent item, including its backref and
11166  * itself (like crossing stripe boundary and type)
11167  *
11168  * Since we don't use extent_record anymore, introduce new error bit
11169  */
11170 static int check_extent_item(struct btrfs_fs_info *fs_info,
11171                              struct extent_buffer *eb, int slot)
11172 {
11173         struct btrfs_extent_item *ei;
11174         struct btrfs_extent_inline_ref *iref;
11175         struct btrfs_extent_data_ref *dref;
11176         unsigned long end;
11177         unsigned long ptr;
11178         int type;
11179         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11180         u32 item_size = btrfs_item_size_nr(eb, slot);
11181         u64 flags;
11182         u64 offset;
11183         int metadata = 0;
11184         int level;
11185         struct btrfs_key key;
11186         int ret;
11187         int err = 0;
11188
11189         btrfs_item_key_to_cpu(eb, &key, slot);
11190         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11191                 bytes_used += key.offset;
11192         else
11193                 bytes_used += nodesize;
11194
11195         if (item_size < sizeof(*ei)) {
11196                 /*
11197                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11198                  * old thing when on disk format is still un-determined.
11199                  * No need to care about it anymore
11200                  */
11201                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11202                 return -ENOTTY;
11203         }
11204
11205         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11206         flags = btrfs_extent_flags(eb, ei);
11207
11208         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11209                 metadata = 1;
11210         if (metadata && check_crossing_stripes(global_info, key.objectid,
11211                                                eb->len)) {
11212                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11213                       key.objectid, key.objectid + nodesize);
11214                 err |= CROSSING_STRIPE_BOUNDARY;
11215         }
11216
11217         ptr = (unsigned long)(ei + 1);
11218
11219         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11220                 /* Old EXTENT_ITEM metadata */
11221                 struct btrfs_tree_block_info *info;
11222
11223                 info = (struct btrfs_tree_block_info *)ptr;
11224                 level = btrfs_tree_block_level(eb, info);
11225                 ptr += sizeof(struct btrfs_tree_block_info);
11226         } else {
11227                 /* New METADATA_ITEM */
11228                 level = key.offset;
11229         }
11230         end = (unsigned long)ei + item_size;
11231
11232 next:
11233         /* Reached extent item end normally */
11234         if (ptr == end)
11235                 goto out;
11236
11237         /* Beyond extent item end, wrong item size */
11238         if (ptr > end) {
11239                 err |= ITEM_SIZE_MISMATCH;
11240                 error("extent item at bytenr %llu slot %d has wrong size",
11241                         eb->start, slot);
11242                 goto out;
11243         }
11244
11245         /* Now check every backref in this extent item */
11246         iref = (struct btrfs_extent_inline_ref *)ptr;
11247         type = btrfs_extent_inline_ref_type(eb, iref);
11248         offset = btrfs_extent_inline_ref_offset(eb, iref);
11249         switch (type) {
11250         case BTRFS_TREE_BLOCK_REF_KEY:
11251                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11252                                                level);
11253                 err |= ret;
11254                 break;
11255         case BTRFS_SHARED_BLOCK_REF_KEY:
11256                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11257                                                  level);
11258                 err |= ret;
11259                 break;
11260         case BTRFS_EXTENT_DATA_REF_KEY:
11261                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11262                 ret = check_extent_data_backref(fs_info,
11263                                 btrfs_extent_data_ref_root(eb, dref),
11264                                 btrfs_extent_data_ref_objectid(eb, dref),
11265                                 btrfs_extent_data_ref_offset(eb, dref),
11266                                 key.objectid, key.offset,
11267                                 btrfs_extent_data_ref_count(eb, dref));
11268                 err |= ret;
11269                 break;
11270         case BTRFS_SHARED_DATA_REF_KEY:
11271                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11272                 err |= ret;
11273                 break;
11274         default:
11275                 error("extent[%llu %d %llu] has unknown ref type: %d",
11276                         key.objectid, key.type, key.offset, type);
11277                 err |= UNKNOWN_TYPE;
11278                 goto out;
11279         }
11280
11281         ptr += btrfs_extent_inline_ref_size(type);
11282         goto next;
11283
11284 out:
11285         return err;
11286 }
11287
11288 /*
11289  * Check if a dev extent item is referred correctly by its chunk
11290  */
11291 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11292                                  struct extent_buffer *eb, int slot)
11293 {
11294         struct btrfs_root *chunk_root = fs_info->chunk_root;
11295         struct btrfs_dev_extent *ptr;
11296         struct btrfs_path path;
11297         struct btrfs_key chunk_key;
11298         struct btrfs_key devext_key;
11299         struct btrfs_chunk *chunk;
11300         struct extent_buffer *l;
11301         int num_stripes;
11302         u64 length;
11303         int i;
11304         int found_chunk = 0;
11305         int ret;
11306
11307         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11308         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11309         length = btrfs_dev_extent_length(eb, ptr);
11310
11311         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11312         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11313         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11314
11315         btrfs_init_path(&path);
11316         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11317         if (ret)
11318                 goto out;
11319
11320         l = path.nodes[0];
11321         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11322         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11323                                       chunk_key.offset);
11324         if (ret < 0)
11325                 goto out;
11326
11327         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11328                 goto out;
11329
11330         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11331         for (i = 0; i < num_stripes; i++) {
11332                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11333                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11334
11335                 if (devid == devext_key.objectid &&
11336                     offset == devext_key.offset) {
11337                         found_chunk = 1;
11338                         break;
11339                 }
11340         }
11341 out:
11342         btrfs_release_path(&path);
11343         if (!found_chunk) {
11344                 error(
11345                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11346                         devext_key.objectid, devext_key.offset, length);
11347                 return REFERENCER_MISSING;
11348         }
11349         return 0;
11350 }
11351
11352 /*
11353  * Check if the used space is correct with the dev item
11354  */
11355 static int check_dev_item(struct btrfs_fs_info *fs_info,
11356                           struct extent_buffer *eb, int slot)
11357 {
11358         struct btrfs_root *dev_root = fs_info->dev_root;
11359         struct btrfs_dev_item *dev_item;
11360         struct btrfs_path path;
11361         struct btrfs_key key;
11362         struct btrfs_dev_extent *ptr;
11363         u64 dev_id;
11364         u64 used;
11365         u64 total = 0;
11366         int ret;
11367
11368         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11369         dev_id = btrfs_device_id(eb, dev_item);
11370         used = btrfs_device_bytes_used(eb, dev_item);
11371
11372         key.objectid = dev_id;
11373         key.type = BTRFS_DEV_EXTENT_KEY;
11374         key.offset = 0;
11375
11376         btrfs_init_path(&path);
11377         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11378         if (ret < 0) {
11379                 btrfs_item_key_to_cpu(eb, &key, slot);
11380                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11381                         key.objectid, key.type, key.offset);
11382                 btrfs_release_path(&path);
11383                 return REFERENCER_MISSING;
11384         }
11385
11386         /* Iterate dev_extents to calculate the used space of a device */
11387         while (1) {
11388                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11389                         goto next;
11390
11391                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11392                 if (key.objectid > dev_id)
11393                         break;
11394                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11395                         goto next;
11396
11397                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11398                                      struct btrfs_dev_extent);
11399                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11400 next:
11401                 ret = btrfs_next_item(dev_root, &path);
11402                 if (ret)
11403                         break;
11404         }
11405         btrfs_release_path(&path);
11406
11407         if (used != total) {
11408                 btrfs_item_key_to_cpu(eb, &key, slot);
11409                 error(
11410 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11411                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11412                         BTRFS_DEV_EXTENT_KEY, dev_id);
11413                 return ACCOUNTING_MISMATCH;
11414         }
11415         return 0;
11416 }
11417
11418 /*
11419  * Check a block group item with its referener (chunk) and its used space
11420  * with extent/metadata item
11421  */
11422 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11423                                   struct extent_buffer *eb, int slot)
11424 {
11425         struct btrfs_root *extent_root = fs_info->extent_root;
11426         struct btrfs_root *chunk_root = fs_info->chunk_root;
11427         struct btrfs_block_group_item *bi;
11428         struct btrfs_block_group_item bg_item;
11429         struct btrfs_path path;
11430         struct btrfs_key bg_key;
11431         struct btrfs_key chunk_key;
11432         struct btrfs_key extent_key;
11433         struct btrfs_chunk *chunk;
11434         struct extent_buffer *leaf;
11435         struct btrfs_extent_item *ei;
11436         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11437         u64 flags;
11438         u64 bg_flags;
11439         u64 used;
11440         u64 total = 0;
11441         int ret;
11442         int err = 0;
11443
11444         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11445         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11446         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11447         used = btrfs_block_group_used(&bg_item);
11448         bg_flags = btrfs_block_group_flags(&bg_item);
11449
11450         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11451         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11452         chunk_key.offset = bg_key.objectid;
11453
11454         btrfs_init_path(&path);
11455         /* Search for the referencer chunk */
11456         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11457         if (ret) {
11458                 error(
11459                 "block group[%llu %llu] did not find the related chunk item",
11460                         bg_key.objectid, bg_key.offset);
11461                 err |= REFERENCER_MISSING;
11462         } else {
11463                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11464                                         struct btrfs_chunk);
11465                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11466                                                 bg_key.offset) {
11467                         error(
11468         "block group[%llu %llu] related chunk item length does not match",
11469                                 bg_key.objectid, bg_key.offset);
11470                         err |= REFERENCER_MISMATCH;
11471                 }
11472         }
11473         btrfs_release_path(&path);
11474
11475         /* Search from the block group bytenr */
11476         extent_key.objectid = bg_key.objectid;
11477         extent_key.type = 0;
11478         extent_key.offset = 0;
11479
11480         btrfs_init_path(&path);
11481         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11482         if (ret < 0)
11483                 goto out;
11484
11485         /* Iterate extent tree to account used space */
11486         while (1) {
11487                 leaf = path.nodes[0];
11488
11489                 /* Search slot can point to the last item beyond leaf nritems */
11490                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11491                         goto next;
11492
11493                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11494                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11495                         break;
11496
11497                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11498                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11499                         goto next;
11500                 if (extent_key.objectid < bg_key.objectid)
11501                         goto next;
11502
11503                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11504                         total += nodesize;
11505                 else
11506                         total += extent_key.offset;
11507
11508                 ei = btrfs_item_ptr(leaf, path.slots[0],
11509                                     struct btrfs_extent_item);
11510                 flags = btrfs_extent_flags(leaf, ei);
11511                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11512                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11513                                 error(
11514                         "bad extent[%llu, %llu) type mismatch with chunk",
11515                                         extent_key.objectid,
11516                                         extent_key.objectid + extent_key.offset);
11517                                 err |= CHUNK_TYPE_MISMATCH;
11518                         }
11519                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11520                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11521                                     BTRFS_BLOCK_GROUP_METADATA))) {
11522                                 error(
11523                         "bad extent[%llu, %llu) type mismatch with chunk",
11524                                         extent_key.objectid,
11525                                         extent_key.objectid + nodesize);
11526                                 err |= CHUNK_TYPE_MISMATCH;
11527                         }
11528                 }
11529 next:
11530                 ret = btrfs_next_item(extent_root, &path);
11531                 if (ret)
11532                         break;
11533         }
11534
11535 out:
11536         btrfs_release_path(&path);
11537
11538         if (total != used) {
11539                 error(
11540                 "block group[%llu %llu] used %llu but extent items used %llu",
11541                         bg_key.objectid, bg_key.offset, used, total);
11542                 err |= ACCOUNTING_MISMATCH;
11543         }
11544         return err;
11545 }
11546
11547 /*
11548  * Check a chunk item.
11549  * Including checking all referred dev_extents and block group
11550  */
11551 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11552                             struct extent_buffer *eb, int slot)
11553 {
11554         struct btrfs_root *extent_root = fs_info->extent_root;
11555         struct btrfs_root *dev_root = fs_info->dev_root;
11556         struct btrfs_path path;
11557         struct btrfs_key chunk_key;
11558         struct btrfs_key bg_key;
11559         struct btrfs_key devext_key;
11560         struct btrfs_chunk *chunk;
11561         struct extent_buffer *leaf;
11562         struct btrfs_block_group_item *bi;
11563         struct btrfs_block_group_item bg_item;
11564         struct btrfs_dev_extent *ptr;
11565         u64 length;
11566         u64 chunk_end;
11567         u64 stripe_len;
11568         u64 type;
11569         int num_stripes;
11570         u64 offset;
11571         u64 objectid;
11572         int i;
11573         int ret;
11574         int err = 0;
11575
11576         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11577         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11578         length = btrfs_chunk_length(eb, chunk);
11579         chunk_end = chunk_key.offset + length;
11580         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11581                                       chunk_key.offset);
11582         if (ret < 0) {
11583                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11584                         chunk_end);
11585                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11586                 goto out;
11587         }
11588         type = btrfs_chunk_type(eb, chunk);
11589
11590         bg_key.objectid = chunk_key.offset;
11591         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11592         bg_key.offset = length;
11593
11594         btrfs_init_path(&path);
11595         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11596         if (ret) {
11597                 error(
11598                 "chunk[%llu %llu) did not find the related block group item",
11599                         chunk_key.offset, chunk_end);
11600                 err |= REFERENCER_MISSING;
11601         } else{
11602                 leaf = path.nodes[0];
11603                 bi = btrfs_item_ptr(leaf, path.slots[0],
11604                                     struct btrfs_block_group_item);
11605                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11606                                    sizeof(bg_item));
11607                 if (btrfs_block_group_flags(&bg_item) != type) {
11608                         error(
11609 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11610                                 chunk_key.offset, chunk_end, type,
11611                                 btrfs_block_group_flags(&bg_item));
11612                         err |= REFERENCER_MISSING;
11613                 }
11614         }
11615
11616         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11617         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11618         for (i = 0; i < num_stripes; i++) {
11619                 btrfs_release_path(&path);
11620                 btrfs_init_path(&path);
11621                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11622                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11623                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11624
11625                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11626                                         0, 0);
11627                 if (ret)
11628                         goto not_match_dev;
11629
11630                 leaf = path.nodes[0];
11631                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11632                                      struct btrfs_dev_extent);
11633                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11634                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11635                 if (objectid != chunk_key.objectid ||
11636                     offset != chunk_key.offset ||
11637                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11638                         goto not_match_dev;
11639                 continue;
11640 not_match_dev:
11641                 err |= BACKREF_MISSING;
11642                 error(
11643                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11644                         chunk_key.objectid, chunk_end, i);
11645                 continue;
11646         }
11647         btrfs_release_path(&path);
11648 out:
11649         return err;
11650 }
11651
11652 /*
11653  * Main entry function to check known items and update related accounting info
11654  */
11655 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11656 {
11657         struct btrfs_fs_info *fs_info = root->fs_info;
11658         struct btrfs_key key;
11659         int slot = 0;
11660         int type;
11661         struct btrfs_extent_data_ref *dref;
11662         int ret;
11663         int err = 0;
11664
11665 next:
11666         btrfs_item_key_to_cpu(eb, &key, slot);
11667         type = key.type;
11668
11669         switch (type) {
11670         case BTRFS_EXTENT_DATA_KEY:
11671                 ret = check_extent_data_item(root, eb, slot);
11672                 err |= ret;
11673                 break;
11674         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11675                 ret = check_block_group_item(fs_info, eb, slot);
11676                 err |= ret;
11677                 break;
11678         case BTRFS_DEV_ITEM_KEY:
11679                 ret = check_dev_item(fs_info, eb, slot);
11680                 err |= ret;
11681                 break;
11682         case BTRFS_CHUNK_ITEM_KEY:
11683                 ret = check_chunk_item(fs_info, eb, slot);
11684                 err |= ret;
11685                 break;
11686         case BTRFS_DEV_EXTENT_KEY:
11687                 ret = check_dev_extent_item(fs_info, eb, slot);
11688                 err |= ret;
11689                 break;
11690         case BTRFS_EXTENT_ITEM_KEY:
11691         case BTRFS_METADATA_ITEM_KEY:
11692                 ret = check_extent_item(fs_info, eb, slot);
11693                 err |= ret;
11694                 break;
11695         case BTRFS_EXTENT_CSUM_KEY:
11696                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11697                 break;
11698         case BTRFS_TREE_BLOCK_REF_KEY:
11699                 ret = check_tree_block_backref(fs_info, key.offset,
11700                                                key.objectid, -1);
11701                 err |= ret;
11702                 break;
11703         case BTRFS_EXTENT_DATA_REF_KEY:
11704                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11705                 ret = check_extent_data_backref(fs_info,
11706                                 btrfs_extent_data_ref_root(eb, dref),
11707                                 btrfs_extent_data_ref_objectid(eb, dref),
11708                                 btrfs_extent_data_ref_offset(eb, dref),
11709                                 key.objectid, 0,
11710                                 btrfs_extent_data_ref_count(eb, dref));
11711                 err |= ret;
11712                 break;
11713         case BTRFS_SHARED_BLOCK_REF_KEY:
11714                 ret = check_shared_block_backref(fs_info, key.offset,
11715                                                  key.objectid, -1);
11716                 err |= ret;
11717                 break;
11718         case BTRFS_SHARED_DATA_REF_KEY:
11719                 ret = check_shared_data_backref(fs_info, key.offset,
11720                                                 key.objectid);
11721                 err |= ret;
11722                 break;
11723         default:
11724                 break;
11725         }
11726
11727         if (++slot < btrfs_header_nritems(eb))
11728                 goto next;
11729
11730         return err;
11731 }
11732
11733 /*
11734  * Helper function for later fs/subvol tree check.  To determine if a tree
11735  * block should be checked.
11736  * This function will ensure only the direct referencer with lowest rootid to
11737  * check a fs/subvolume tree block.
11738  *
11739  * Backref check at extent tree would detect errors like missing subvolume
11740  * tree, so we can do aggressive check to reduce duplicated checks.
11741  */
11742 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11743 {
11744         struct btrfs_root *extent_root = root->fs_info->extent_root;
11745         struct btrfs_key key;
11746         struct btrfs_path path;
11747         struct extent_buffer *leaf;
11748         int slot;
11749         struct btrfs_extent_item *ei;
11750         unsigned long ptr;
11751         unsigned long end;
11752         int type;
11753         u32 item_size;
11754         u64 offset;
11755         struct btrfs_extent_inline_ref *iref;
11756         int ret;
11757
11758         btrfs_init_path(&path);
11759         key.objectid = btrfs_header_bytenr(eb);
11760         key.type = BTRFS_METADATA_ITEM_KEY;
11761         key.offset = (u64)-1;
11762
11763         /*
11764          * Any failure in backref resolving means we can't determine
11765          * whom the tree block belongs to.
11766          * So in that case, we need to check that tree block
11767          */
11768         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11769         if (ret < 0)
11770                 goto need_check;
11771
11772         ret = btrfs_previous_extent_item(extent_root, &path,
11773                                          btrfs_header_bytenr(eb));
11774         if (ret)
11775                 goto need_check;
11776
11777         leaf = path.nodes[0];
11778         slot = path.slots[0];
11779         btrfs_item_key_to_cpu(leaf, &key, slot);
11780         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11781
11782         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11783                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11784         } else {
11785                 struct btrfs_tree_block_info *info;
11786
11787                 info = (struct btrfs_tree_block_info *)(ei + 1);
11788                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11789         }
11790
11791         item_size = btrfs_item_size_nr(leaf, slot);
11792         ptr = (unsigned long)iref;
11793         end = (unsigned long)ei + item_size;
11794         while (ptr < end) {
11795                 iref = (struct btrfs_extent_inline_ref *)ptr;
11796                 type = btrfs_extent_inline_ref_type(leaf, iref);
11797                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11798
11799                 /*
11800                  * We only check the tree block if current root is
11801                  * the lowest referencer of it.
11802                  */
11803                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11804                     offset < root->objectid) {
11805                         btrfs_release_path(&path);
11806                         return 0;
11807                 }
11808
11809                 ptr += btrfs_extent_inline_ref_size(type);
11810         }
11811         /*
11812          * Normally we should also check keyed tree block ref, but that may be
11813          * very time consuming.  Inlined ref should already make us skip a lot
11814          * of refs now.  So skip search keyed tree block ref.
11815          */
11816
11817 need_check:
11818         btrfs_release_path(&path);
11819         return 1;
11820 }
11821
11822 /*
11823  * Traversal function for tree block. We will do:
11824  * 1) Skip shared fs/subvolume tree blocks
11825  * 2) Update related bytes accounting
11826  * 3) Pre-order traversal
11827  */
11828 static int traverse_tree_block(struct btrfs_root *root,
11829                                 struct extent_buffer *node)
11830 {
11831         struct extent_buffer *eb;
11832         struct btrfs_key key;
11833         struct btrfs_key drop_key;
11834         int level;
11835         u64 nr;
11836         int i;
11837         int err = 0;
11838         int ret;
11839
11840         /*
11841          * Skip shared fs/subvolume tree block, in that case they will
11842          * be checked by referencer with lowest rootid
11843          */
11844         if (is_fstree(root->objectid) && !should_check(root, node))
11845                 return 0;
11846
11847         /* Update bytes accounting */
11848         total_btree_bytes += node->len;
11849         if (fs_root_objectid(btrfs_header_owner(node)))
11850                 total_fs_tree_bytes += node->len;
11851         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11852                 total_extent_tree_bytes += node->len;
11853
11854         /* pre-order tranversal, check itself first */
11855         level = btrfs_header_level(node);
11856         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11857                                    btrfs_header_level(node),
11858                                    btrfs_header_owner(node));
11859         err |= ret;
11860         if (err)
11861                 error(
11862         "check %s failed root %llu bytenr %llu level %d, force continue check",
11863                         level ? "node":"leaf", root->objectid,
11864                         btrfs_header_bytenr(node), btrfs_header_level(node));
11865
11866         if (!level) {
11867                 btree_space_waste += btrfs_leaf_free_space(root, node);
11868                 ret = check_leaf_items(root, node);
11869                 err |= ret;
11870                 return err;
11871         }
11872
11873         nr = btrfs_header_nritems(node);
11874         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11875         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11876                 sizeof(struct btrfs_key_ptr);
11877
11878         /* Then check all its children */
11879         for (i = 0; i < nr; i++) {
11880                 u64 blocknr = btrfs_node_blockptr(node, i);
11881
11882                 btrfs_node_key_to_cpu(node, &key, i);
11883                 if (level == root->root_item.drop_level &&
11884                     is_dropped_key(&key, &drop_key))
11885                         continue;
11886
11887                 /*
11888                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11889                  * to call the function itself.
11890                  */
11891                 eb = read_tree_block(root->fs_info, blocknr, 0);
11892                 if (extent_buffer_uptodate(eb)) {
11893                         ret = traverse_tree_block(root, eb);
11894                         err |= ret;
11895                 }
11896                 free_extent_buffer(eb);
11897         }
11898
11899         return err;
11900 }
11901
11902 /*
11903  * Low memory usage version check_chunks_and_extents.
11904  */
11905 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11906 {
11907         struct btrfs_path path;
11908         struct btrfs_key key;
11909         struct btrfs_root *root1;
11910         struct btrfs_root *root;
11911         struct btrfs_root *cur_root;
11912         int err = 0;
11913         int ret;
11914
11915         root = fs_info->fs_root;
11916
11917         root1 = root->fs_info->chunk_root;
11918         ret = traverse_tree_block(root1, root1->node);
11919         err |= ret;
11920
11921         root1 = root->fs_info->tree_root;
11922         ret = traverse_tree_block(root1, root1->node);
11923         err |= ret;
11924
11925         btrfs_init_path(&path);
11926         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11927         key.offset = 0;
11928         key.type = BTRFS_ROOT_ITEM_KEY;
11929
11930         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11931         if (ret) {
11932                 error("cannot find extent treet in tree_root");
11933                 goto out;
11934         }
11935
11936         while (1) {
11937                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11938                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11939                         goto next;
11940                 key.offset = (u64)-1;
11941
11942                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11943                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11944                                         &key);
11945                 else
11946                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11947                 if (IS_ERR(cur_root) || !cur_root) {
11948                         error("failed to read tree: %lld", key.objectid);
11949                         goto next;
11950                 }
11951
11952                 ret = traverse_tree_block(cur_root, cur_root->node);
11953                 err |= ret;
11954
11955                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11956                         btrfs_free_fs_root(cur_root);
11957 next:
11958                 ret = btrfs_next_item(root1, &path);
11959                 if (ret)
11960                         goto out;
11961         }
11962
11963 out:
11964         btrfs_release_path(&path);
11965         return err;
11966 }
11967
11968 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11969 {
11970         int ret;
11971
11972         if (!ctx.progress_enabled)
11973                 fprintf(stderr, "checking extents\n");
11974         if (check_mode == CHECK_MODE_LOWMEM)
11975                 ret = check_chunks_and_extents_v2(fs_info);
11976         else
11977                 ret = check_chunks_and_extents(fs_info);
11978
11979         return ret;
11980 }
11981
11982 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11983                            struct btrfs_root *root, int overwrite)
11984 {
11985         struct extent_buffer *c;
11986         struct extent_buffer *old = root->node;
11987         int level;
11988         int ret;
11989         struct btrfs_disk_key disk_key = {0,0,0};
11990
11991         level = 0;
11992
11993         if (overwrite) {
11994                 c = old;
11995                 extent_buffer_get(c);
11996                 goto init;
11997         }
11998         c = btrfs_alloc_free_block(trans, root,
11999                                    root->fs_info->nodesize,
12000                                    root->root_key.objectid,
12001                                    &disk_key, level, 0, 0);
12002         if (IS_ERR(c)) {
12003                 c = old;
12004                 extent_buffer_get(c);
12005                 overwrite = 1;
12006         }
12007 init:
12008         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12009         btrfs_set_header_level(c, level);
12010         btrfs_set_header_bytenr(c, c->start);
12011         btrfs_set_header_generation(c, trans->transid);
12012         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12013         btrfs_set_header_owner(c, root->root_key.objectid);
12014
12015         write_extent_buffer(c, root->fs_info->fsid,
12016                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12017
12018         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12019                             btrfs_header_chunk_tree_uuid(c),
12020                             BTRFS_UUID_SIZE);
12021
12022         btrfs_mark_buffer_dirty(c);
12023         /*
12024          * this case can happen in the following case:
12025          *
12026          * 1.overwrite previous root.
12027          *
12028          * 2.reinit reloc data root, this is because we skip pin
12029          * down reloc data tree before which means we can allocate
12030          * same block bytenr here.
12031          */
12032         if (old->start == c->start) {
12033                 btrfs_set_root_generation(&root->root_item,
12034                                           trans->transid);
12035                 root->root_item.level = btrfs_header_level(root->node);
12036                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12037                                         &root->root_key, &root->root_item);
12038                 if (ret) {
12039                         free_extent_buffer(c);
12040                         return ret;
12041                 }
12042         }
12043         free_extent_buffer(old);
12044         root->node = c;
12045         add_root_to_dirty_list(root);
12046         return 0;
12047 }
12048
12049 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12050                                 struct extent_buffer *eb, int tree_root)
12051 {
12052         struct extent_buffer *tmp;
12053         struct btrfs_root_item *ri;
12054         struct btrfs_key key;
12055         u64 bytenr;
12056         int level = btrfs_header_level(eb);
12057         int nritems;
12058         int ret;
12059         int i;
12060
12061         /*
12062          * If we have pinned this block before, don't pin it again.
12063          * This can not only avoid forever loop with broken filesystem
12064          * but also give us some speedups.
12065          */
12066         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12067                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12068                 return 0;
12069
12070         btrfs_pin_extent(fs_info, eb->start, eb->len);
12071
12072         nritems = btrfs_header_nritems(eb);
12073         for (i = 0; i < nritems; i++) {
12074                 if (level == 0) {
12075                         btrfs_item_key_to_cpu(eb, &key, i);
12076                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12077                                 continue;
12078                         /* Skip the extent root and reloc roots */
12079                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12080                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12081                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12082                                 continue;
12083                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12084                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12085
12086                         /*
12087                          * If at any point we start needing the real root we
12088                          * will have to build a stump root for the root we are
12089                          * in, but for now this doesn't actually use the root so
12090                          * just pass in extent_root.
12091                          */
12092                         tmp = read_tree_block(fs_info, bytenr, 0);
12093                         if (!extent_buffer_uptodate(tmp)) {
12094                                 fprintf(stderr, "Error reading root block\n");
12095                                 return -EIO;
12096                         }
12097                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12098                         free_extent_buffer(tmp);
12099                         if (ret)
12100                                 return ret;
12101                 } else {
12102                         bytenr = btrfs_node_blockptr(eb, i);
12103
12104                         /* If we aren't the tree root don't read the block */
12105                         if (level == 1 && !tree_root) {
12106                                 btrfs_pin_extent(fs_info, bytenr,
12107                                                 fs_info->nodesize);
12108                                 continue;
12109                         }
12110
12111                         tmp = read_tree_block(fs_info, bytenr, 0);
12112                         if (!extent_buffer_uptodate(tmp)) {
12113                                 fprintf(stderr, "Error reading tree block\n");
12114                                 return -EIO;
12115                         }
12116                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12117                         free_extent_buffer(tmp);
12118                         if (ret)
12119                                 return ret;
12120                 }
12121         }
12122
12123         return 0;
12124 }
12125
12126 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12127 {
12128         int ret;
12129
12130         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12131         if (ret)
12132                 return ret;
12133
12134         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12135 }
12136
12137 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12138 {
12139         struct btrfs_block_group_cache *cache;
12140         struct btrfs_path path;
12141         struct extent_buffer *leaf;
12142         struct btrfs_chunk *chunk;
12143         struct btrfs_key key;
12144         int ret;
12145         u64 start;
12146
12147         btrfs_init_path(&path);
12148         key.objectid = 0;
12149         key.type = BTRFS_CHUNK_ITEM_KEY;
12150         key.offset = 0;
12151         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12152         if (ret < 0) {
12153                 btrfs_release_path(&path);
12154                 return ret;
12155         }
12156
12157         /*
12158          * We do this in case the block groups were screwed up and had alloc
12159          * bits that aren't actually set on the chunks.  This happens with
12160          * restored images every time and could happen in real life I guess.
12161          */
12162         fs_info->avail_data_alloc_bits = 0;
12163         fs_info->avail_metadata_alloc_bits = 0;
12164         fs_info->avail_system_alloc_bits = 0;
12165
12166         /* First we need to create the in-memory block groups */
12167         while (1) {
12168                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12169                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12170                         if (ret < 0) {
12171                                 btrfs_release_path(&path);
12172                                 return ret;
12173                         }
12174                         if (ret) {
12175                                 ret = 0;
12176                                 break;
12177                         }
12178                 }
12179                 leaf = path.nodes[0];
12180                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12181                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12182                         path.slots[0]++;
12183                         continue;
12184                 }
12185
12186                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12187                 btrfs_add_block_group(fs_info, 0,
12188                                       btrfs_chunk_type(leaf, chunk),
12189                                       key.objectid, key.offset,
12190                                       btrfs_chunk_length(leaf, chunk));
12191                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12192                                  key.offset + btrfs_chunk_length(leaf, chunk));
12193                 path.slots[0]++;
12194         }
12195         start = 0;
12196         while (1) {
12197                 cache = btrfs_lookup_first_block_group(fs_info, start);
12198                 if (!cache)
12199                         break;
12200                 cache->cached = 1;
12201                 start = cache->key.objectid + cache->key.offset;
12202         }
12203
12204         btrfs_release_path(&path);
12205         return 0;
12206 }
12207
12208 static int reset_balance(struct btrfs_trans_handle *trans,
12209                          struct btrfs_fs_info *fs_info)
12210 {
12211         struct btrfs_root *root = fs_info->tree_root;
12212         struct btrfs_path path;
12213         struct extent_buffer *leaf;
12214         struct btrfs_key key;
12215         int del_slot, del_nr = 0;
12216         int ret;
12217         int found = 0;
12218
12219         btrfs_init_path(&path);
12220         key.objectid = BTRFS_BALANCE_OBJECTID;
12221         key.type = BTRFS_BALANCE_ITEM_KEY;
12222         key.offset = 0;
12223         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12224         if (ret) {
12225                 if (ret > 0)
12226                         ret = 0;
12227                 if (!ret)
12228                         goto reinit_data_reloc;
12229                 else
12230                         goto out;
12231         }
12232
12233         ret = btrfs_del_item(trans, root, &path);
12234         if (ret)
12235                 goto out;
12236         btrfs_release_path(&path);
12237
12238         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12239         key.type = BTRFS_ROOT_ITEM_KEY;
12240         key.offset = 0;
12241         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12242         if (ret < 0)
12243                 goto out;
12244         while (1) {
12245                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12246                         if (!found)
12247                                 break;
12248
12249                         if (del_nr) {
12250                                 ret = btrfs_del_items(trans, root, &path,
12251                                                       del_slot, del_nr);
12252                                 del_nr = 0;
12253                                 if (ret)
12254                                         goto out;
12255                         }
12256                         key.offset++;
12257                         btrfs_release_path(&path);
12258
12259                         found = 0;
12260                         ret = btrfs_search_slot(trans, root, &key, &path,
12261                                                 -1, 1);
12262                         if (ret < 0)
12263                                 goto out;
12264                         continue;
12265                 }
12266                 found = 1;
12267                 leaf = path.nodes[0];
12268                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12269                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12270                         break;
12271                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12272                         path.slots[0]++;
12273                         continue;
12274                 }
12275                 if (!del_nr) {
12276                         del_slot = path.slots[0];
12277                         del_nr = 1;
12278                 } else {
12279                         del_nr++;
12280                 }
12281                 path.slots[0]++;
12282         }
12283
12284         if (del_nr) {
12285                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12286                 if (ret)
12287                         goto out;
12288         }
12289         btrfs_release_path(&path);
12290
12291 reinit_data_reloc:
12292         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12293         key.type = BTRFS_ROOT_ITEM_KEY;
12294         key.offset = (u64)-1;
12295         root = btrfs_read_fs_root(fs_info, &key);
12296         if (IS_ERR(root)) {
12297                 fprintf(stderr, "Error reading data reloc tree\n");
12298                 ret = PTR_ERR(root);
12299                 goto out;
12300         }
12301         record_root_in_trans(trans, root);
12302         ret = btrfs_fsck_reinit_root(trans, root, 0);
12303         if (ret)
12304                 goto out;
12305         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12306 out:
12307         btrfs_release_path(&path);
12308         return ret;
12309 }
12310
12311 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12312                               struct btrfs_fs_info *fs_info)
12313 {
12314         u64 start = 0;
12315         int ret;
12316
12317         /*
12318          * The only reason we don't do this is because right now we're just
12319          * walking the trees we find and pinning down their bytes, we don't look
12320          * at any of the leaves.  In order to do mixed groups we'd have to check
12321          * the leaves of any fs roots and pin down the bytes for any file
12322          * extents we find.  Not hard but why do it if we don't have to?
12323          */
12324         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12325                 fprintf(stderr, "We don't support re-initing the extent tree "
12326                         "for mixed block groups yet, please notify a btrfs "
12327                         "developer you want to do this so they can add this "
12328                         "functionality.\n");
12329                 return -EINVAL;
12330         }
12331
12332         /*
12333          * first we need to walk all of the trees except the extent tree and pin
12334          * down the bytes that are in use so we don't overwrite any existing
12335          * metadata.
12336          */
12337         ret = pin_metadata_blocks(fs_info);
12338         if (ret) {
12339                 fprintf(stderr, "error pinning down used bytes\n");
12340                 return ret;
12341         }
12342
12343         /*
12344          * Need to drop all the block groups since we're going to recreate all
12345          * of them again.
12346          */
12347         btrfs_free_block_groups(fs_info);
12348         ret = reset_block_groups(fs_info);
12349         if (ret) {
12350                 fprintf(stderr, "error resetting the block groups\n");
12351                 return ret;
12352         }
12353
12354         /* Ok we can allocate now, reinit the extent root */
12355         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12356         if (ret) {
12357                 fprintf(stderr, "extent root initialization failed\n");
12358                 /*
12359                  * When the transaction code is updated we should end the
12360                  * transaction, but for now progs only knows about commit so
12361                  * just return an error.
12362                  */
12363                 return ret;
12364         }
12365
12366         /*
12367          * Now we have all the in-memory block groups setup so we can make
12368          * allocations properly, and the metadata we care about is safe since we
12369          * pinned all of it above.
12370          */
12371         while (1) {
12372                 struct btrfs_block_group_cache *cache;
12373
12374                 cache = btrfs_lookup_first_block_group(fs_info, start);
12375                 if (!cache)
12376                         break;
12377                 start = cache->key.objectid + cache->key.offset;
12378                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12379                                         &cache->key, &cache->item,
12380                                         sizeof(cache->item));
12381                 if (ret) {
12382                         fprintf(stderr, "Error adding block group\n");
12383                         return ret;
12384                 }
12385                 btrfs_extent_post_op(trans, fs_info->extent_root);
12386         }
12387
12388         ret = reset_balance(trans, fs_info);
12389         if (ret)
12390                 fprintf(stderr, "error resetting the pending balance\n");
12391
12392         return ret;
12393 }
12394
12395 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12396 {
12397         struct btrfs_path path;
12398         struct btrfs_trans_handle *trans;
12399         struct btrfs_key key;
12400         int ret;
12401
12402         printf("Recowing metadata block %llu\n", eb->start);
12403         key.objectid = btrfs_header_owner(eb);
12404         key.type = BTRFS_ROOT_ITEM_KEY;
12405         key.offset = (u64)-1;
12406
12407         root = btrfs_read_fs_root(root->fs_info, &key);
12408         if (IS_ERR(root)) {
12409                 fprintf(stderr, "Couldn't find owner root %llu\n",
12410                         key.objectid);
12411                 return PTR_ERR(root);
12412         }
12413
12414         trans = btrfs_start_transaction(root, 1);
12415         if (IS_ERR(trans))
12416                 return PTR_ERR(trans);
12417
12418         btrfs_init_path(&path);
12419         path.lowest_level = btrfs_header_level(eb);
12420         if (path.lowest_level)
12421                 btrfs_node_key_to_cpu(eb, &key, 0);
12422         else
12423                 btrfs_item_key_to_cpu(eb, &key, 0);
12424
12425         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12426         btrfs_commit_transaction(trans, root);
12427         btrfs_release_path(&path);
12428         return ret;
12429 }
12430
12431 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12432 {
12433         struct btrfs_path path;
12434         struct btrfs_trans_handle *trans;
12435         struct btrfs_key key;
12436         int ret;
12437
12438         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12439                bad->key.type, bad->key.offset);
12440         key.objectid = bad->root_id;
12441         key.type = BTRFS_ROOT_ITEM_KEY;
12442         key.offset = (u64)-1;
12443
12444         root = btrfs_read_fs_root(root->fs_info, &key);
12445         if (IS_ERR(root)) {
12446                 fprintf(stderr, "Couldn't find owner root %llu\n",
12447                         key.objectid);
12448                 return PTR_ERR(root);
12449         }
12450
12451         trans = btrfs_start_transaction(root, 1);
12452         if (IS_ERR(trans))
12453                 return PTR_ERR(trans);
12454
12455         btrfs_init_path(&path);
12456         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12457         if (ret) {
12458                 if (ret > 0)
12459                         ret = 0;
12460                 goto out;
12461         }
12462         ret = btrfs_del_item(trans, root, &path);
12463 out:
12464         btrfs_commit_transaction(trans, root);
12465         btrfs_release_path(&path);
12466         return ret;
12467 }
12468
12469 static int zero_log_tree(struct btrfs_root *root)
12470 {
12471         struct btrfs_trans_handle *trans;
12472         int ret;
12473
12474         trans = btrfs_start_transaction(root, 1);
12475         if (IS_ERR(trans)) {
12476                 ret = PTR_ERR(trans);
12477                 return ret;
12478         }
12479         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12480         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12481         ret = btrfs_commit_transaction(trans, root);
12482         return ret;
12483 }
12484
12485 static int populate_csum(struct btrfs_trans_handle *trans,
12486                          struct btrfs_root *csum_root, char *buf, u64 start,
12487                          u64 len)
12488 {
12489         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12490         u64 offset = 0;
12491         u64 sectorsize;
12492         int ret = 0;
12493
12494         while (offset < len) {
12495                 sectorsize = fs_info->sectorsize;
12496                 ret = read_extent_data(fs_info, buf, start + offset,
12497                                        &sectorsize, 0);
12498                 if (ret)
12499                         break;
12500                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12501                                             start + offset, buf, sectorsize);
12502                 if (ret)
12503                         break;
12504                 offset += sectorsize;
12505         }
12506         return ret;
12507 }
12508
12509 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12510                                       struct btrfs_root *csum_root,
12511                                       struct btrfs_root *cur_root)
12512 {
12513         struct btrfs_path path;
12514         struct btrfs_key key;
12515         struct extent_buffer *node;
12516         struct btrfs_file_extent_item *fi;
12517         char *buf = NULL;
12518         u64 start = 0;
12519         u64 len = 0;
12520         int slot = 0;
12521         int ret = 0;
12522
12523         buf = malloc(cur_root->fs_info->sectorsize);
12524         if (!buf)
12525                 return -ENOMEM;
12526
12527         btrfs_init_path(&path);
12528         key.objectid = 0;
12529         key.offset = 0;
12530         key.type = 0;
12531         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12532         if (ret < 0)
12533                 goto out;
12534         /* Iterate all regular file extents and fill its csum */
12535         while (1) {
12536                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12537
12538                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12539                         goto next;
12540                 node = path.nodes[0];
12541                 slot = path.slots[0];
12542                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12543                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12544                         goto next;
12545                 start = btrfs_file_extent_disk_bytenr(node, fi);
12546                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12547
12548                 ret = populate_csum(trans, csum_root, buf, start, len);
12549                 if (ret == -EEXIST)
12550                         ret = 0;
12551                 if (ret < 0)
12552                         goto out;
12553 next:
12554                 /*
12555                  * TODO: if next leaf is corrupted, jump to nearest next valid
12556                  * leaf.
12557                  */
12558                 ret = btrfs_next_item(cur_root, &path);
12559                 if (ret < 0)
12560                         goto out;
12561                 if (ret > 0) {
12562                         ret = 0;
12563                         goto out;
12564                 }
12565         }
12566
12567 out:
12568         btrfs_release_path(&path);
12569         free(buf);
12570         return ret;
12571 }
12572
12573 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12574                                   struct btrfs_root *csum_root)
12575 {
12576         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12577         struct btrfs_path path;
12578         struct btrfs_root *tree_root = fs_info->tree_root;
12579         struct btrfs_root *cur_root;
12580         struct extent_buffer *node;
12581         struct btrfs_key key;
12582         int slot = 0;
12583         int ret = 0;
12584
12585         btrfs_init_path(&path);
12586         key.objectid = BTRFS_FS_TREE_OBJECTID;
12587         key.offset = 0;
12588         key.type = BTRFS_ROOT_ITEM_KEY;
12589         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12590         if (ret < 0)
12591                 goto out;
12592         if (ret > 0) {
12593                 ret = -ENOENT;
12594                 goto out;
12595         }
12596
12597         while (1) {
12598                 node = path.nodes[0];
12599                 slot = path.slots[0];
12600                 btrfs_item_key_to_cpu(node, &key, slot);
12601                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12602                         goto out;
12603                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12604                         goto next;
12605                 if (!is_fstree(key.objectid))
12606                         goto next;
12607                 key.offset = (u64)-1;
12608
12609                 cur_root = btrfs_read_fs_root(fs_info, &key);
12610                 if (IS_ERR(cur_root) || !cur_root) {
12611                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12612                                 key.objectid);
12613                         goto out;
12614                 }
12615                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12616                                 cur_root);
12617                 if (ret < 0)
12618                         goto out;
12619 next:
12620                 ret = btrfs_next_item(tree_root, &path);
12621                 if (ret > 0) {
12622                         ret = 0;
12623                         goto out;
12624                 }
12625                 if (ret < 0)
12626                         goto out;
12627         }
12628
12629 out:
12630         btrfs_release_path(&path);
12631         return ret;
12632 }
12633
12634 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12635                                       struct btrfs_root *csum_root)
12636 {
12637         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12638         struct btrfs_path path;
12639         struct btrfs_extent_item *ei;
12640         struct extent_buffer *leaf;
12641         char *buf;
12642         struct btrfs_key key;
12643         int ret;
12644
12645         btrfs_init_path(&path);
12646         key.objectid = 0;
12647         key.type = BTRFS_EXTENT_ITEM_KEY;
12648         key.offset = 0;
12649         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12650         if (ret < 0) {
12651                 btrfs_release_path(&path);
12652                 return ret;
12653         }
12654
12655         buf = malloc(csum_root->fs_info->sectorsize);
12656         if (!buf) {
12657                 btrfs_release_path(&path);
12658                 return -ENOMEM;
12659         }
12660
12661         while (1) {
12662                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12663                         ret = btrfs_next_leaf(extent_root, &path);
12664                         if (ret < 0)
12665                                 break;
12666                         if (ret) {
12667                                 ret = 0;
12668                                 break;
12669                         }
12670                 }
12671                 leaf = path.nodes[0];
12672
12673                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12674                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12675                         path.slots[0]++;
12676                         continue;
12677                 }
12678
12679                 ei = btrfs_item_ptr(leaf, path.slots[0],
12680                                     struct btrfs_extent_item);
12681                 if (!(btrfs_extent_flags(leaf, ei) &
12682                       BTRFS_EXTENT_FLAG_DATA)) {
12683                         path.slots[0]++;
12684                         continue;
12685                 }
12686
12687                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12688                                     key.offset);
12689                 if (ret)
12690                         break;
12691                 path.slots[0]++;
12692         }
12693
12694         btrfs_release_path(&path);
12695         free(buf);
12696         return ret;
12697 }
12698
12699 /*
12700  * Recalculate the csum and put it into the csum tree.
12701  *
12702  * Extent tree init will wipe out all the extent info, so in that case, we
12703  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12704  * will use fs/subvol trees to init the csum tree.
12705  */
12706 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12707                           struct btrfs_root *csum_root,
12708                           int search_fs_tree)
12709 {
12710         if (search_fs_tree)
12711                 return fill_csum_tree_from_fs(trans, csum_root);
12712         else
12713                 return fill_csum_tree_from_extent(trans, csum_root);
12714 }
12715
12716 static void free_roots_info_cache(void)
12717 {
12718         if (!roots_info_cache)
12719                 return;
12720
12721         while (!cache_tree_empty(roots_info_cache)) {
12722                 struct cache_extent *entry;
12723                 struct root_item_info *rii;
12724
12725                 entry = first_cache_extent(roots_info_cache);
12726                 if (!entry)
12727                         break;
12728                 remove_cache_extent(roots_info_cache, entry);
12729                 rii = container_of(entry, struct root_item_info, cache_extent);
12730                 free(rii);
12731         }
12732
12733         free(roots_info_cache);
12734         roots_info_cache = NULL;
12735 }
12736
12737 static int build_roots_info_cache(struct btrfs_fs_info *info)
12738 {
12739         int ret = 0;
12740         struct btrfs_key key;
12741         struct extent_buffer *leaf;
12742         struct btrfs_path path;
12743
12744         if (!roots_info_cache) {
12745                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12746                 if (!roots_info_cache)
12747                         return -ENOMEM;
12748                 cache_tree_init(roots_info_cache);
12749         }
12750
12751         btrfs_init_path(&path);
12752         key.objectid = 0;
12753         key.type = BTRFS_EXTENT_ITEM_KEY;
12754         key.offset = 0;
12755         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12756         if (ret < 0)
12757                 goto out;
12758         leaf = path.nodes[0];
12759
12760         while (1) {
12761                 struct btrfs_key found_key;
12762                 struct btrfs_extent_item *ei;
12763                 struct btrfs_extent_inline_ref *iref;
12764                 int slot = path.slots[0];
12765                 int type;
12766                 u64 flags;
12767                 u64 root_id;
12768                 u8 level;
12769                 struct cache_extent *entry;
12770                 struct root_item_info *rii;
12771
12772                 if (slot >= btrfs_header_nritems(leaf)) {
12773                         ret = btrfs_next_leaf(info->extent_root, &path);
12774                         if (ret < 0) {
12775                                 break;
12776                         } else if (ret) {
12777                                 ret = 0;
12778                                 break;
12779                         }
12780                         leaf = path.nodes[0];
12781                         slot = path.slots[0];
12782                 }
12783
12784                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12785
12786                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12787                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12788                         goto next;
12789
12790                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12791                 flags = btrfs_extent_flags(leaf, ei);
12792
12793                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12794                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12795                         goto next;
12796
12797                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12798                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12799                         level = found_key.offset;
12800                 } else {
12801                         struct btrfs_tree_block_info *binfo;
12802
12803                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12804                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12805                         level = btrfs_tree_block_level(leaf, binfo);
12806                 }
12807
12808                 /*
12809                  * For a root extent, it must be of the following type and the
12810                  * first (and only one) iref in the item.
12811                  */
12812                 type = btrfs_extent_inline_ref_type(leaf, iref);
12813                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12814                         goto next;
12815
12816                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12817                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12818                 if (!entry) {
12819                         rii = malloc(sizeof(struct root_item_info));
12820                         if (!rii) {
12821                                 ret = -ENOMEM;
12822                                 goto out;
12823                         }
12824                         rii->cache_extent.start = root_id;
12825                         rii->cache_extent.size = 1;
12826                         rii->level = (u8)-1;
12827                         entry = &rii->cache_extent;
12828                         ret = insert_cache_extent(roots_info_cache, entry);
12829                         ASSERT(ret == 0);
12830                 } else {
12831                         rii = container_of(entry, struct root_item_info,
12832                                            cache_extent);
12833                 }
12834
12835                 ASSERT(rii->cache_extent.start == root_id);
12836                 ASSERT(rii->cache_extent.size == 1);
12837
12838                 if (level > rii->level || rii->level == (u8)-1) {
12839                         rii->level = level;
12840                         rii->bytenr = found_key.objectid;
12841                         rii->gen = btrfs_extent_generation(leaf, ei);
12842                         rii->node_count = 1;
12843                 } else if (level == rii->level) {
12844                         rii->node_count++;
12845                 }
12846 next:
12847                 path.slots[0]++;
12848         }
12849
12850 out:
12851         btrfs_release_path(&path);
12852
12853         return ret;
12854 }
12855
12856 static int maybe_repair_root_item(struct btrfs_path *path,
12857                                   const struct btrfs_key *root_key,
12858                                   const int read_only_mode)
12859 {
12860         const u64 root_id = root_key->objectid;
12861         struct cache_extent *entry;
12862         struct root_item_info *rii;
12863         struct btrfs_root_item ri;
12864         unsigned long offset;
12865
12866         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12867         if (!entry) {
12868                 fprintf(stderr,
12869                         "Error: could not find extent items for root %llu\n",
12870                         root_key->objectid);
12871                 return -ENOENT;
12872         }
12873
12874         rii = container_of(entry, struct root_item_info, cache_extent);
12875         ASSERT(rii->cache_extent.start == root_id);
12876         ASSERT(rii->cache_extent.size == 1);
12877
12878         if (rii->node_count != 1) {
12879                 fprintf(stderr,
12880                         "Error: could not find btree root extent for root %llu\n",
12881                         root_id);
12882                 return -ENOENT;
12883         }
12884
12885         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12886         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12887
12888         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12889             btrfs_root_level(&ri) != rii->level ||
12890             btrfs_root_generation(&ri) != rii->gen) {
12891
12892                 /*
12893                  * If we're in repair mode but our caller told us to not update
12894                  * the root item, i.e. just check if it needs to be updated, don't
12895                  * print this message, since the caller will call us again shortly
12896                  * for the same root item without read only mode (the caller will
12897                  * open a transaction first).
12898                  */
12899                 if (!(read_only_mode && repair))
12900                         fprintf(stderr,
12901                                 "%sroot item for root %llu,"
12902                                 " current bytenr %llu, current gen %llu, current level %u,"
12903                                 " new bytenr %llu, new gen %llu, new level %u\n",
12904                                 (read_only_mode ? "" : "fixing "),
12905                                 root_id,
12906                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12907                                 btrfs_root_level(&ri),
12908                                 rii->bytenr, rii->gen, rii->level);
12909
12910                 if (btrfs_root_generation(&ri) > rii->gen) {
12911                         fprintf(stderr,
12912                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12913                                 root_id, btrfs_root_generation(&ri), rii->gen);
12914                         return -EINVAL;
12915                 }
12916
12917                 if (!read_only_mode) {
12918                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12919                         btrfs_set_root_level(&ri, rii->level);
12920                         btrfs_set_root_generation(&ri, rii->gen);
12921                         write_extent_buffer(path->nodes[0], &ri,
12922                                             offset, sizeof(ri));
12923                 }
12924
12925                 return 1;
12926         }
12927
12928         return 0;
12929 }
12930
12931 /*
12932  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12933  * caused read-only snapshots to be corrupted if they were created at a moment
12934  * when the source subvolume/snapshot had orphan items. The issue was that the
12935  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12936  * node instead of the post orphan cleanup root node.
12937  * So this function, and its callees, just detects and fixes those cases. Even
12938  * though the regression was for read-only snapshots, this function applies to
12939  * any snapshot/subvolume root.
12940  * This must be run before any other repair code - not doing it so, makes other
12941  * repair code delete or modify backrefs in the extent tree for example, which
12942  * will result in an inconsistent fs after repairing the root items.
12943  */
12944 static int repair_root_items(struct btrfs_fs_info *info)
12945 {
12946         struct btrfs_path path;
12947         struct btrfs_key key;
12948         struct extent_buffer *leaf;
12949         struct btrfs_trans_handle *trans = NULL;
12950         int ret = 0;
12951         int bad_roots = 0;
12952         int need_trans = 0;
12953
12954         btrfs_init_path(&path);
12955
12956         ret = build_roots_info_cache(info);
12957         if (ret)
12958                 goto out;
12959
12960         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12961         key.type = BTRFS_ROOT_ITEM_KEY;
12962         key.offset = 0;
12963
12964 again:
12965         /*
12966          * Avoid opening and committing transactions if a leaf doesn't have
12967          * any root items that need to be fixed, so that we avoid rotating
12968          * backup roots unnecessarily.
12969          */
12970         if (need_trans) {
12971                 trans = btrfs_start_transaction(info->tree_root, 1);
12972                 if (IS_ERR(trans)) {
12973                         ret = PTR_ERR(trans);
12974                         goto out;
12975                 }
12976         }
12977
12978         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12979                                 0, trans ? 1 : 0);
12980         if (ret < 0)
12981                 goto out;
12982         leaf = path.nodes[0];
12983
12984         while (1) {
12985                 struct btrfs_key found_key;
12986
12987                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12988                         int no_more_keys = find_next_key(&path, &key);
12989
12990                         btrfs_release_path(&path);
12991                         if (trans) {
12992                                 ret = btrfs_commit_transaction(trans,
12993                                                                info->tree_root);
12994                                 trans = NULL;
12995                                 if (ret < 0)
12996                                         goto out;
12997                         }
12998                         need_trans = 0;
12999                         if (no_more_keys)
13000                                 break;
13001                         goto again;
13002                 }
13003
13004                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13005
13006                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13007                         goto next;
13008                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13009                         goto next;
13010
13011                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13012                 if (ret < 0)
13013                         goto out;
13014                 if (ret) {
13015                         if (!trans && repair) {
13016                                 need_trans = 1;
13017                                 key = found_key;
13018                                 btrfs_release_path(&path);
13019                                 goto again;
13020                         }
13021                         bad_roots++;
13022                 }
13023 next:
13024                 path.slots[0]++;
13025         }
13026         ret = 0;
13027 out:
13028         free_roots_info_cache();
13029         btrfs_release_path(&path);
13030         if (trans)
13031                 btrfs_commit_transaction(trans, info->tree_root);
13032         if (ret < 0)
13033                 return ret;
13034
13035         return bad_roots;
13036 }
13037
13038 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13039 {
13040         struct btrfs_trans_handle *trans;
13041         struct btrfs_block_group_cache *bg_cache;
13042         u64 current = 0;
13043         int ret = 0;
13044
13045         /* Clear all free space cache inodes and its extent data */
13046         while (1) {
13047                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13048                 if (!bg_cache)
13049                         break;
13050                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13051                 if (ret < 0)
13052                         return ret;
13053                 current = bg_cache->key.objectid + bg_cache->key.offset;
13054         }
13055
13056         /* Don't forget to set cache_generation to -1 */
13057         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13058         if (IS_ERR(trans)) {
13059                 error("failed to update super block cache generation");
13060                 return PTR_ERR(trans);
13061         }
13062         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13063         btrfs_commit_transaction(trans, fs_info->tree_root);
13064
13065         return ret;
13066 }
13067
13068 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13069                 int clear_version)
13070 {
13071         int ret = 0;
13072
13073         if (clear_version == 1) {
13074                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13075                         error(
13076                 "free space cache v2 detected, use --clear-space-cache v2");
13077                         ret = 1;
13078                         goto close_out;
13079                 }
13080                 printf("Clearing free space cache\n");
13081                 ret = clear_free_space_cache(fs_info);
13082                 if (ret) {
13083                         error("failed to clear free space cache");
13084                         ret = 1;
13085                 } else {
13086                         printf("Free space cache cleared\n");
13087                 }
13088         } else if (clear_version == 2) {
13089                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13090                         printf("no free space cache v2 to clear\n");
13091                         ret = 0;
13092                         goto close_out;
13093                 }
13094                 printf("Clear free space cache v2\n");
13095                 ret = btrfs_clear_free_space_tree(fs_info);
13096                 if (ret) {
13097                         error("failed to clear free space cache v2: %d", ret);
13098                         ret = 1;
13099                 } else {
13100                         printf("free space cache v2 cleared\n");
13101                 }
13102         }
13103 close_out:
13104         return ret;
13105 }
13106
13107 const char * const cmd_check_usage[] = {
13108         "btrfs check [options] <device>",
13109         "Check structural integrity of a filesystem (unmounted).",
13110         "Check structural integrity of an unmounted filesystem. Verify internal",
13111         "trees' consistency and item connectivity. In the repair mode try to",
13112         "fix the problems found. ",
13113         "WARNING: the repair mode is considered dangerous",
13114         "",
13115         "-s|--super <superblock>     use this superblock copy",
13116         "-b|--backup                 use the first valid backup root copy",
13117         "--force                     skip mount checks, repair is not possible",
13118         "--repair                    try to repair the filesystem",
13119         "--readonly                  run in read-only mode (default)",
13120         "--init-csum-tree            create a new CRC tree",
13121         "--init-extent-tree          create a new extent tree",
13122         "--mode <MODE>               allows choice of memory/IO trade-offs",
13123         "                            where MODE is one of:",
13124         "                            original - read inodes and extents to memory (requires",
13125         "                                       more memory, does less IO)",
13126         "                            lowmem   - try to use less memory but read blocks again",
13127         "                                       when needed",
13128         "--check-data-csum           verify checksums of data blocks",
13129         "-Q|--qgroup-report          print a report on qgroup consistency",
13130         "-E|--subvol-extents <subvolid>",
13131         "                            print subvolume extents and sharing state",
13132         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13133         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13134         "-p|--progress               indicate progress",
13135         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13136         NULL
13137 };
13138
13139 int cmd_check(int argc, char **argv)
13140 {
13141         struct cache_tree root_cache;
13142         struct btrfs_root *root;
13143         struct btrfs_fs_info *info;
13144         u64 bytenr = 0;
13145         u64 subvolid = 0;
13146         u64 tree_root_bytenr = 0;
13147         u64 chunk_root_bytenr = 0;
13148         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13149         int ret = 0;
13150         int err = 0;
13151         u64 num;
13152         int init_csum_tree = 0;
13153         int readonly = 0;
13154         int clear_space_cache = 0;
13155         int qgroup_report = 0;
13156         int qgroups_repaired = 0;
13157         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13158         int force = 0;
13159
13160         while(1) {
13161                 int c;
13162                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13163                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13164                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13165                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13166                         GETOPT_VAL_FORCE };
13167                 static const struct option long_options[] = {
13168                         { "super", required_argument, NULL, 's' },
13169                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13170                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13171                         { "init-csum-tree", no_argument, NULL,
13172                                 GETOPT_VAL_INIT_CSUM },
13173                         { "init-extent-tree", no_argument, NULL,
13174                                 GETOPT_VAL_INIT_EXTENT },
13175                         { "check-data-csum", no_argument, NULL,
13176                                 GETOPT_VAL_CHECK_CSUM },
13177                         { "backup", no_argument, NULL, 'b' },
13178                         { "subvol-extents", required_argument, NULL, 'E' },
13179                         { "qgroup-report", no_argument, NULL, 'Q' },
13180                         { "tree-root", required_argument, NULL, 'r' },
13181                         { "chunk-root", required_argument, NULL,
13182                                 GETOPT_VAL_CHUNK_TREE },
13183                         { "progress", no_argument, NULL, 'p' },
13184                         { "mode", required_argument, NULL,
13185                                 GETOPT_VAL_MODE },
13186                         { "clear-space-cache", required_argument, NULL,
13187                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13188                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13189                         { NULL, 0, NULL, 0}
13190                 };
13191
13192                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13193                 if (c < 0)
13194                         break;
13195                 switch(c) {
13196                         case 'a': /* ignored */ break;
13197                         case 'b':
13198                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13199                                 break;
13200                         case 's':
13201                                 num = arg_strtou64(optarg);
13202                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13203                                         error(
13204                                         "super mirror should be less than %d",
13205                                                 BTRFS_SUPER_MIRROR_MAX);
13206                                         exit(1);
13207                                 }
13208                                 bytenr = btrfs_sb_offset(((int)num));
13209                                 printf("using SB copy %llu, bytenr %llu\n", num,
13210                                        (unsigned long long)bytenr);
13211                                 break;
13212                         case 'Q':
13213                                 qgroup_report = 1;
13214                                 break;
13215                         case 'E':
13216                                 subvolid = arg_strtou64(optarg);
13217                                 break;
13218                         case 'r':
13219                                 tree_root_bytenr = arg_strtou64(optarg);
13220                                 break;
13221                         case GETOPT_VAL_CHUNK_TREE:
13222                                 chunk_root_bytenr = arg_strtou64(optarg);
13223                                 break;
13224                         case 'p':
13225                                 ctx.progress_enabled = true;
13226                                 break;
13227                         case '?':
13228                         case 'h':
13229                                 usage(cmd_check_usage);
13230                         case GETOPT_VAL_REPAIR:
13231                                 printf("enabling repair mode\n");
13232                                 repair = 1;
13233                                 ctree_flags |= OPEN_CTREE_WRITES;
13234                                 break;
13235                         case GETOPT_VAL_READONLY:
13236                                 readonly = 1;
13237                                 break;
13238                         case GETOPT_VAL_INIT_CSUM:
13239                                 printf("Creating a new CRC tree\n");
13240                                 init_csum_tree = 1;
13241                                 repair = 1;
13242                                 ctree_flags |= OPEN_CTREE_WRITES;
13243                                 break;
13244                         case GETOPT_VAL_INIT_EXTENT:
13245                                 init_extent_tree = 1;
13246                                 ctree_flags |= (OPEN_CTREE_WRITES |
13247                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13248                                 repair = 1;
13249                                 break;
13250                         case GETOPT_VAL_CHECK_CSUM:
13251                                 check_data_csum = 1;
13252                                 break;
13253                         case GETOPT_VAL_MODE:
13254                                 check_mode = parse_check_mode(optarg);
13255                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13256                                         error("unknown mode: %s", optarg);
13257                                         exit(1);
13258                                 }
13259                                 break;
13260                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13261                                 if (strcmp(optarg, "v1") == 0) {
13262                                         clear_space_cache = 1;
13263                                 } else if (strcmp(optarg, "v2") == 0) {
13264                                         clear_space_cache = 2;
13265                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13266                                 } else {
13267                                         error(
13268                 "invalid argument to --clear-space-cache, must be v1 or v2");
13269                                         exit(1);
13270                                 }
13271                                 ctree_flags |= OPEN_CTREE_WRITES;
13272                                 break;
13273                         case GETOPT_VAL_FORCE:
13274                                 force = 1;
13275                                 break;
13276                 }
13277         }
13278
13279         if (check_argc_exact(argc - optind, 1))
13280                 usage(cmd_check_usage);
13281
13282         if (ctx.progress_enabled) {
13283                 ctx.tp = TASK_NOTHING;
13284                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13285         }
13286
13287         /* This check is the only reason for --readonly to exist */
13288         if (readonly && repair) {
13289                 error("repair options are not compatible with --readonly");
13290                 exit(1);
13291         }
13292
13293         /*
13294          * experimental and dangerous
13295          */
13296         if (repair && check_mode == CHECK_MODE_LOWMEM)
13297                 warning("low-memory mode repair support is only partial");
13298
13299         radix_tree_init();
13300         cache_tree_init(&root_cache);
13301
13302         ret = check_mounted(argv[optind]);
13303         if (!force) {
13304                 if (ret < 0) {
13305                         error("could not check mount status: %s",
13306                                         strerror(-ret));
13307                         err |= !!ret;
13308                         goto err_out;
13309                 } else if (ret) {
13310                         error(
13311 "%s is currently mounted, use --force if you really intend to check the filesystem",
13312                                 argv[optind]);
13313                         ret = -EBUSY;
13314                         err |= !!ret;
13315                         goto err_out;
13316                 }
13317         } else {
13318                 if (repair) {
13319                         error("repair and --force is not yet supported");
13320                         ret = 1;
13321                         err |= !!ret;
13322                         goto err_out;
13323                 }
13324                 if (ret < 0) {
13325                         warning(
13326 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13327                                 argv[optind]);
13328                 } else if (ret) {
13329                         warning(
13330                         "filesystem mounted, continuing because of --force");
13331                 }
13332                 /* A block device is mounted in exclusive mode by kernel */
13333                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13334         }
13335
13336         /* only allow partial opening under repair mode */
13337         if (repair)
13338                 ctree_flags |= OPEN_CTREE_PARTIAL;
13339
13340         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13341                                   chunk_root_bytenr, ctree_flags);
13342         if (!info) {
13343                 error("cannot open file system");
13344                 ret = -EIO;
13345                 err |= !!ret;
13346                 goto err_out;
13347         }
13348
13349         global_info = info;
13350         root = info->fs_root;
13351         uuid_unparse(info->super_copy->fsid, uuidbuf);
13352
13353         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13354
13355         /*
13356          * Check the bare minimum before starting anything else that could rely
13357          * on it, namely the tree roots, any local consistency checks
13358          */
13359         if (!extent_buffer_uptodate(info->tree_root->node) ||
13360             !extent_buffer_uptodate(info->dev_root->node) ||
13361             !extent_buffer_uptodate(info->chunk_root->node)) {
13362                 error("critical roots corrupted, unable to check the filesystem");
13363                 err |= !!ret;
13364                 ret = -EIO;
13365                 goto close_out;
13366         }
13367
13368         if (clear_space_cache) {
13369                 ret = do_clear_free_space_cache(info, clear_space_cache);
13370                 err |= !!ret;
13371                 goto close_out;
13372         }
13373
13374         /*
13375          * repair mode will force us to commit transaction which
13376          * will make us fail to load log tree when mounting.
13377          */
13378         if (repair && btrfs_super_log_root(info->super_copy)) {
13379                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13380                 if (!ret) {
13381                         ret = 1;
13382                         err |= !!ret;
13383                         goto close_out;
13384                 }
13385                 ret = zero_log_tree(root);
13386                 err |= !!ret;
13387                 if (ret) {
13388                         error("failed to zero log tree: %d", ret);
13389                         goto close_out;
13390                 }
13391         }
13392
13393         if (qgroup_report) {
13394                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13395                        uuidbuf);
13396                 ret = qgroup_verify_all(info);
13397                 err |= !!ret;
13398                 if (ret == 0)
13399                         report_qgroups(1);
13400                 goto close_out;
13401         }
13402         if (subvolid) {
13403                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13404                        subvolid, argv[optind], uuidbuf);
13405                 ret = print_extent_state(info, subvolid);
13406                 err |= !!ret;
13407                 goto close_out;
13408         }
13409
13410         if (init_extent_tree || init_csum_tree) {
13411                 struct btrfs_trans_handle *trans;
13412
13413                 trans = btrfs_start_transaction(info->extent_root, 0);
13414                 if (IS_ERR(trans)) {
13415                         error("error starting transaction");
13416                         ret = PTR_ERR(trans);
13417                         err |= !!ret;
13418                         goto close_out;
13419                 }
13420
13421                 if (init_extent_tree) {
13422                         printf("Creating a new extent tree\n");
13423                         ret = reinit_extent_tree(trans, info);
13424                         err |= !!ret;
13425                         if (ret)
13426                                 goto close_out;
13427                 }
13428
13429                 if (init_csum_tree) {
13430                         printf("Reinitialize checksum tree\n");
13431                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13432                         if (ret) {
13433                                 error("checksum tree initialization failed: %d",
13434                                                 ret);
13435                                 ret = -EIO;
13436                                 err |= !!ret;
13437                                 goto close_out;
13438                         }
13439
13440                         ret = fill_csum_tree(trans, info->csum_root,
13441                                              init_extent_tree);
13442                         err |= !!ret;
13443                         if (ret) {
13444                                 error("checksum tree refilling failed: %d", ret);
13445                                 return -EIO;
13446                         }
13447                 }
13448                 /*
13449                  * Ok now we commit and run the normal fsck, which will add
13450                  * extent entries for all of the items it finds.
13451                  */
13452                 ret = btrfs_commit_transaction(trans, info->extent_root);
13453                 err |= !!ret;
13454                 if (ret)
13455                         goto close_out;
13456         }
13457         if (!extent_buffer_uptodate(info->extent_root->node)) {
13458                 error("critical: extent_root, unable to check the filesystem");
13459                 ret = -EIO;
13460                 err |= !!ret;
13461                 goto close_out;
13462         }
13463         if (!extent_buffer_uptodate(info->csum_root->node)) {
13464                 error("critical: csum_root, unable to check the filesystem");
13465                 ret = -EIO;
13466                 err |= !!ret;
13467                 goto close_out;
13468         }
13469
13470         ret = do_check_chunks_and_extents(info);
13471         err |= !!ret;
13472         if (ret)
13473                 error(
13474                 "errors found in extent allocation tree or chunk allocation");
13475
13476         ret = repair_root_items(info);
13477         err |= !!ret;
13478         if (ret < 0) {
13479                 error("failed to repair root items: %s", strerror(-ret));
13480                 goto close_out;
13481         }
13482         if (repair) {
13483                 fprintf(stderr, "Fixed %d roots.\n", ret);
13484                 ret = 0;
13485         } else if (ret > 0) {
13486                 fprintf(stderr,
13487                        "Found %d roots with an outdated root item.\n",
13488                        ret);
13489                 fprintf(stderr,
13490                         "Please run a filesystem check with the option --repair to fix them.\n");
13491                 ret = 1;
13492                 err |= !!ret;
13493                 goto close_out;
13494         }
13495
13496         if (!ctx.progress_enabled) {
13497                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13498                         fprintf(stderr, "checking free space tree\n");
13499                 else
13500                         fprintf(stderr, "checking free space cache\n");
13501         }
13502         ret = check_space_cache(root);
13503         err |= !!ret;
13504         if (ret) {
13505                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13506                         error("errors found in free space tree");
13507                 else
13508                         error("errors found in free space cache");
13509                 goto out;
13510         }
13511
13512         /*
13513          * We used to have to have these hole extents in between our real
13514          * extents so if we don't have this flag set we need to make sure there
13515          * are no gaps in the file extents for inodes, otherwise we can just
13516          * ignore it when this happens.
13517          */
13518         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13519         ret = do_check_fs_roots(info, &root_cache);
13520         err |= !!ret;
13521         if (ret) {
13522                 error("errors found in fs roots");
13523                 goto out;
13524         }
13525
13526         fprintf(stderr, "checking csums\n");
13527         ret = check_csums(root);
13528         err |= !!ret;
13529         if (ret) {
13530                 error("errors found in csum tree");
13531                 goto out;
13532         }
13533
13534         fprintf(stderr, "checking root refs\n");
13535         /* For low memory mode, check_fs_roots_v2 handles root refs */
13536         if (check_mode != CHECK_MODE_LOWMEM) {
13537                 ret = check_root_refs(root, &root_cache);
13538                 err |= !!ret;
13539                 if (ret) {
13540                         error("errors found in root refs");
13541                         goto out;
13542                 }
13543         }
13544
13545         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13546                 struct extent_buffer *eb;
13547
13548                 eb = list_first_entry(&root->fs_info->recow_ebs,
13549                                       struct extent_buffer, recow);
13550                 list_del_init(&eb->recow);
13551                 ret = recow_extent_buffer(root, eb);
13552                 err |= !!ret;
13553                 if (ret) {
13554                         error("fails to fix transid errors");
13555                         break;
13556                 }
13557         }
13558
13559         while (!list_empty(&delete_items)) {
13560                 struct bad_item *bad;
13561
13562                 bad = list_first_entry(&delete_items, struct bad_item, list);
13563                 list_del_init(&bad->list);
13564                 if (repair) {
13565                         ret = delete_bad_item(root, bad);
13566                         err |= !!ret;
13567                 }
13568                 free(bad);
13569         }
13570
13571         if (info->quota_enabled) {
13572                 fprintf(stderr, "checking quota groups\n");
13573                 ret = qgroup_verify_all(info);
13574                 err |= !!ret;
13575                 if (ret) {
13576                         error("failed to check quota groups");
13577                         goto out;
13578                 }
13579                 report_qgroups(0);
13580                 ret = repair_qgroups(info, &qgroups_repaired);
13581                 err |= !!ret;
13582                 if (err) {
13583                         error("failed to repair quota groups");
13584                         goto out;
13585                 }
13586                 ret = 0;
13587         }
13588
13589         if (!list_empty(&root->fs_info->recow_ebs)) {
13590                 error("transid errors in file system");
13591                 ret = 1;
13592                 err |= !!ret;
13593         }
13594 out:
13595         printf("found %llu bytes used, ",
13596                (unsigned long long)bytes_used);
13597         if (err)
13598                 printf("error(s) found\n");
13599         else
13600                 printf("no error found\n");
13601         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13602         printf("total tree bytes: %llu\n",
13603                (unsigned long long)total_btree_bytes);
13604         printf("total fs tree bytes: %llu\n",
13605                (unsigned long long)total_fs_tree_bytes);
13606         printf("total extent tree bytes: %llu\n",
13607                (unsigned long long)total_extent_tree_bytes);
13608         printf("btree space waste bytes: %llu\n",
13609                (unsigned long long)btree_space_waste);
13610         printf("file data blocks allocated: %llu\n referenced %llu\n",
13611                 (unsigned long long)data_bytes_allocated,
13612                 (unsigned long long)data_bytes_referenced);
13613
13614         free_qgroup_counts();
13615         free_root_recs_tree(&root_cache);
13616 close_out:
13617         close_ctree(root);
13618 err_out:
13619         if (ctx.progress_enabled)
13620                 task_deinit(ctx.info);
13621
13622         return err;
13623 }