btrfs-progs: check: special case for last item
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 {
139         return container_of(back, struct data_backref, node);
140 }
141
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 {
144         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146         struct data_backref *back1 = to_data_backref(ext1);
147         struct data_backref *back2 = to_data_backref(ext2);
148
149         WARN_ON(!ext1->is_data);
150         WARN_ON(!ext2->is_data);
151
152         /* parent and root are a union, so this covers both */
153         if (back1->parent > back2->parent)
154                 return 1;
155         if (back1->parent < back2->parent)
156                 return -1;
157
158         /* This is a full backref and the parents match. */
159         if (back1->node.full_backref)
160                 return 0;
161
162         if (back1->owner > back2->owner)
163                 return 1;
164         if (back1->owner < back2->owner)
165                 return -1;
166
167         if (back1->offset > back2->offset)
168                 return 1;
169         if (back1->offset < back2->offset)
170                 return -1;
171
172         if (back1->found_ref && back2->found_ref) {
173                 if (back1->disk_bytenr > back2->disk_bytenr)
174                         return 1;
175                 if (back1->disk_bytenr < back2->disk_bytenr)
176                         return -1;
177
178                 if (back1->bytes > back2->bytes)
179                         return 1;
180                 if (back1->bytes < back2->bytes)
181                         return -1;
182         }
183
184         return 0;
185 }
186
187 /*
188  * Much like data_backref, just removed the undetermined members
189  * and change it to use list_head.
190  * During extent scan, it is stored in root->orphan_data_extent.
191  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192  */
193 struct orphan_data_extent {
194         struct list_head list;
195         u64 root;
196         u64 objectid;
197         u64 offset;
198         u64 disk_bytenr;
199         u64 disk_len;
200 };
201
202 struct tree_backref {
203         struct extent_backref node;
204         union {
205                 u64 parent;
206                 u64 root;
207         };
208 };
209
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 {
212         return container_of(back, struct tree_backref, node);
213 }
214
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 {
217         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219         struct tree_backref *back1 = to_tree_backref(ext1);
220         struct tree_backref *back2 = to_tree_backref(ext2);
221
222         WARN_ON(ext1->is_data);
223         WARN_ON(ext2->is_data);
224
225         /* parent and root are a union, so this covers both */
226         if (back1->parent > back2->parent)
227                 return 1;
228         if (back1->parent < back2->parent)
229                 return -1;
230
231         return 0;
232 }
233
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 {
236         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238
239         if (ext1->is_data > ext2->is_data)
240                 return 1;
241
242         if (ext1->is_data < ext2->is_data)
243                 return -1;
244
245         if (ext1->full_backref > ext2->full_backref)
246                 return 1;
247         if (ext1->full_backref < ext2->full_backref)
248                 return -1;
249
250         if (ext1->is_data)
251                 return compare_data_backref(node1, node2);
252         else
253                 return compare_tree_backref(node1, node2);
254 }
255
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
258
259 struct extent_record {
260         struct list_head backrefs;
261         struct list_head dups;
262         struct rb_root backref_tree;
263         struct list_head list;
264         struct cache_extent cache;
265         struct btrfs_disk_key parent_key;
266         u64 start;
267         u64 max_size;
268         u64 nr;
269         u64 refs;
270         u64 extent_item_refs;
271         u64 generation;
272         u64 parent_generation;
273         u64 info_objectid;
274         u32 num_duplicates;
275         u8 info_level;
276         unsigned int flag_block_full_backref:2;
277         unsigned int found_rec:1;
278         unsigned int content_checked:1;
279         unsigned int owner_ref_checked:1;
280         unsigned int is_root:1;
281         unsigned int metadata:1;
282         unsigned int bad_full_backref:1;
283         unsigned int crossing_stripes:1;
284         unsigned int wrong_chunk_type:1;
285 };
286
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 {
289         return container_of(entry, struct extent_record, list);
290 }
291
292 struct inode_backref {
293         struct list_head list;
294         unsigned int found_dir_item:1;
295         unsigned int found_dir_index:1;
296         unsigned int found_inode_ref:1;
297         u8 filetype;
298         u8 ref_type;
299         int errors;
300         u64 dir;
301         u64 index;
302         u16 namelen;
303         char name[0];
304 };
305
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 {
308         return list_entry(entry, struct inode_backref, list);
309 }
310
311 struct root_item_record {
312         struct list_head list;
313         u64 objectid;
314         u64 bytenr;
315         u64 last_snapshot;
316         u8 level;
317         u8 drop_level;
318         struct btrfs_key drop_key;
319 };
320
321 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
323 #define REF_ERR_NO_INODE_REF            (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
326 #define REF_ERR_DUP_INODE_REF           (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF             (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
334
335 struct file_extent_hole {
336         struct rb_node node;
337         u64 start;
338         u64 len;
339 };
340
341 struct inode_record {
342         struct list_head backrefs;
343         unsigned int checked:1;
344         unsigned int merging:1;
345         unsigned int found_inode_item:1;
346         unsigned int found_dir_item:1;
347         unsigned int found_file_extent:1;
348         unsigned int found_csum_item:1;
349         unsigned int some_csum_missing:1;
350         unsigned int nodatasum:1;
351         int errors;
352
353         u64 ino;
354         u32 nlink;
355         u32 imode;
356         u64 isize;
357         u64 nbytes;
358
359         u32 found_link;
360         u64 found_size;
361         u64 extent_start;
362         u64 extent_end;
363         struct rb_root holes;
364         struct list_head orphan_extents;
365
366         u32 refs;
367 };
368
369 #define I_ERR_NO_INODE_ITEM             (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
384
385 struct root_backref {
386         struct list_head list;
387         unsigned int found_dir_item:1;
388         unsigned int found_dir_index:1;
389         unsigned int found_back_ref:1;
390         unsigned int found_forward_ref:1;
391         unsigned int reachable:1;
392         int errors;
393         u64 ref_root;
394         u64 dir;
395         u64 index;
396         u16 namelen;
397         char name[0];
398 };
399
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 {
402         return list_entry(entry, struct root_backref, list);
403 }
404
405 struct root_record {
406         struct list_head backrefs;
407         struct cache_extent cache;
408         unsigned int found_root_item:1;
409         u64 objectid;
410         u32 found_ref;
411 };
412
413 struct ptr_node {
414         struct cache_extent cache;
415         void *data;
416 };
417
418 struct shared_node {
419         struct cache_extent cache;
420         struct cache_tree root_cache;
421         struct cache_tree inode_cache;
422         struct inode_record *current;
423         u32 refs;
424 };
425
426 struct block_info {
427         u64 start;
428         u32 size;
429 };
430
431 struct walk_control {
432         struct cache_tree shared;
433         struct shared_node *nodes[BTRFS_MAX_LEVEL];
434         int active_node;
435         int root_level;
436 };
437
438 struct bad_item {
439         struct btrfs_key key;
440         u64 root_id;
441         struct list_head list;
442 };
443
444 struct extent_entry {
445         u64 bytenr;
446         u64 bytes;
447         int count;
448         int broken;
449         struct list_head list;
450 };
451
452 struct root_item_info {
453         /* level of the root */
454         u8 level;
455         /* number of nodes at this level, must be 1 for a root */
456         int node_count;
457         u64 bytenr;
458         u64 gen;
459         struct cache_extent cache_extent;
460 };
461
462 /*
463  * Error bit for low memory mode check.
464  *
465  * Currently no caller cares about it yet.  Just internal use for error
466  * classification.
467  */
468 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH     (1 << 8)
478
479 static void *print_status_check(void *p)
480 {
481         struct task_ctx *priv = p;
482         const char work_indicator[] = { '.', 'o', 'O', 'o' };
483         uint32_t count = 0;
484         static char *task_position_string[] = {
485                 "checking extents",
486                 "checking free space cache",
487                 "checking fs roots",
488         };
489
490         task_period_start(priv->info, 1000 /* 1s */);
491
492         if (priv->tp == TASK_NOTHING)
493                 return NULL;
494
495         while (1) {
496                 printf("%s [%c]\r", task_position_string[priv->tp],
497                                 work_indicator[count % 4]);
498                 count++;
499                 fflush(stdout);
500                 task_period_wait(priv->info);
501         }
502         return NULL;
503 }
504
505 static int print_status_return(void *p)
506 {
507         printf("\n");
508         fflush(stdout);
509
510         return 0;
511 }
512
513 static enum btrfs_check_mode parse_check_mode(const char *str)
514 {
515         if (strcmp(str, "lowmem") == 0)
516                 return CHECK_MODE_LOWMEM;
517         if (strcmp(str, "orig") == 0)
518                 return CHECK_MODE_ORIGINAL;
519         if (strcmp(str, "original") == 0)
520                 return CHECK_MODE_ORIGINAL;
521
522         return CHECK_MODE_UNKNOWN;
523 }
524
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
527 {
528         struct file_extent_hole *hole;
529
530         if (RB_EMPTY_ROOT(holes))
531                 return (u64)-1;
532
533         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534         return hole->start;
535 }
536
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 {
539         struct file_extent_hole *hole1;
540         struct file_extent_hole *hole2;
541
542         hole1 = rb_entry(node1, struct file_extent_hole, node);
543         hole2 = rb_entry(node2, struct file_extent_hole, node);
544
545         if (hole1->start > hole2->start)
546                 return -1;
547         if (hole1->start < hole2->start)
548                 return 1;
549         /* Now hole1->start == hole2->start */
550         if (hole1->len >= hole2->len)
551                 /*
552                  * Hole 1 will be merge center
553                  * Same hole will be merged later
554                  */
555                 return -1;
556         /* Hole 2 will be merge center */
557         return 1;
558 }
559
560 /*
561  * Add a hole to the record
562  *
563  * This will do hole merge for copy_file_extent_holes(),
564  * which will ensure there won't be continuous holes.
565  */
566 static int add_file_extent_hole(struct rb_root *holes,
567                                 u64 start, u64 len)
568 {
569         struct file_extent_hole *hole;
570         struct file_extent_hole *prev = NULL;
571         struct file_extent_hole *next = NULL;
572
573         hole = malloc(sizeof(*hole));
574         if (!hole)
575                 return -ENOMEM;
576         hole->start = start;
577         hole->len = len;
578         /* Since compare will not return 0, no -EEXIST will happen */
579         rb_insert(holes, &hole->node, compare_hole);
580
581         /* simple merge with previous hole */
582         if (rb_prev(&hole->node))
583                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584                                 node);
585         if (prev && prev->start + prev->len >= hole->start) {
586                 hole->len = hole->start + hole->len - prev->start;
587                 hole->start = prev->start;
588                 rb_erase(&prev->node, holes);
589                 free(prev);
590                 prev = NULL;
591         }
592
593         /* iterate merge with next holes */
594         while (1) {
595                 if (!rb_next(&hole->node))
596                         break;
597                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598                                         node);
599                 if (hole->start + hole->len >= next->start) {
600                         if (hole->start + hole->len <= next->start + next->len)
601                                 hole->len = next->start + next->len -
602                                             hole->start;
603                         rb_erase(&next->node, holes);
604                         free(next);
605                         next = NULL;
606                 } else
607                         break;
608         }
609         return 0;
610 }
611
612 static int compare_hole_range(struct rb_node *node, void *data)
613 {
614         struct file_extent_hole *hole;
615         u64 start;
616
617         hole = (struct file_extent_hole *)data;
618         start = hole->start;
619
620         hole = rb_entry(node, struct file_extent_hole, node);
621         if (start < hole->start)
622                 return -1;
623         if (start >= hole->start && start < hole->start + hole->len)
624                 return 0;
625         return 1;
626 }
627
628 /*
629  * Delete a hole in the record
630  *
631  * This will do the hole split and is much restrict than add.
632  */
633 static int del_file_extent_hole(struct rb_root *holes,
634                                 u64 start, u64 len)
635 {
636         struct file_extent_hole *hole;
637         struct file_extent_hole tmp;
638         u64 prev_start = 0;
639         u64 prev_len = 0;
640         u64 next_start = 0;
641         u64 next_len = 0;
642         struct rb_node *node;
643         int have_prev = 0;
644         int have_next = 0;
645         int ret = 0;
646
647         tmp.start = start;
648         tmp.len = len;
649         node = rb_search(holes, &tmp, compare_hole_range, NULL);
650         if (!node)
651                 return -EEXIST;
652         hole = rb_entry(node, struct file_extent_hole, node);
653         if (start + len > hole->start + hole->len)
654                 return -EEXIST;
655
656         /*
657          * Now there will be no overlap, delete the hole and re-add the
658          * split(s) if they exists.
659          */
660         if (start > hole->start) {
661                 prev_start = hole->start;
662                 prev_len = start - hole->start;
663                 have_prev = 1;
664         }
665         if (hole->start + hole->len > start + len) {
666                 next_start = start + len;
667                 next_len = hole->start + hole->len - start - len;
668                 have_next = 1;
669         }
670         rb_erase(node, holes);
671         free(hole);
672         if (have_prev) {
673                 ret = add_file_extent_hole(holes, prev_start, prev_len);
674                 if (ret < 0)
675                         return ret;
676         }
677         if (have_next) {
678                 ret = add_file_extent_hole(holes, next_start, next_len);
679                 if (ret < 0)
680                         return ret;
681         }
682         return 0;
683 }
684
685 static int copy_file_extent_holes(struct rb_root *dst,
686                                   struct rb_root *src)
687 {
688         struct file_extent_hole *hole;
689         struct rb_node *node;
690         int ret = 0;
691
692         node = rb_first(src);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 ret = add_file_extent_hole(dst, hole->start, hole->len);
696                 if (ret)
697                         break;
698                 node = rb_next(node);
699         }
700         return ret;
701 }
702
703 static void free_file_extent_holes(struct rb_root *holes)
704 {
705         struct rb_node *node;
706         struct file_extent_hole *hole;
707
708         node = rb_first(holes);
709         while (node) {
710                 hole = rb_entry(node, struct file_extent_hole, node);
711                 rb_erase(node, holes);
712                 free(hole);
713                 node = rb_first(holes);
714         }
715 }
716
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720                                  struct btrfs_root *root)
721 {
722         if (root->last_trans != trans->transid) {
723                 root->track_dirty = 1;
724                 root->last_trans = trans->transid;
725                 root->commit_root = root->node;
726                 extent_buffer_get(root->node);
727         }
728 }
729
730 static u8 imode_to_type(u32 imode)
731 {
732 #define S_SHIFT 12
733         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
735                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
736                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
737                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
738                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
739                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
740                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
741         };
742
743         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 #undef S_SHIFT
745 }
746
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 {
749         struct device_record *rec1;
750         struct device_record *rec2;
751
752         rec1 = rb_entry(node1, struct device_record, node);
753         rec2 = rb_entry(node2, struct device_record, node);
754         if (rec1->devid > rec2->devid)
755                 return -1;
756         else if (rec1->devid < rec2->devid)
757                 return 1;
758         else
759                 return 0;
760 }
761
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 {
764         struct inode_record *rec;
765         struct inode_backref *backref;
766         struct inode_backref *orig;
767         struct inode_backref *tmp;
768         struct orphan_data_extent *src_orphan;
769         struct orphan_data_extent *dst_orphan;
770         struct rb_node *rb;
771         size_t size;
772         int ret;
773
774         rec = malloc(sizeof(*rec));
775         if (!rec)
776                 return ERR_PTR(-ENOMEM);
777         memcpy(rec, orig_rec, sizeof(*rec));
778         rec->refs = 1;
779         INIT_LIST_HEAD(&rec->backrefs);
780         INIT_LIST_HEAD(&rec->orphan_extents);
781         rec->holes = RB_ROOT;
782
783         list_for_each_entry(orig, &orig_rec->backrefs, list) {
784                 size = sizeof(*orig) + orig->namelen + 1;
785                 backref = malloc(size);
786                 if (!backref) {
787                         ret = -ENOMEM;
788                         goto cleanup;
789                 }
790                 memcpy(backref, orig, size);
791                 list_add_tail(&backref->list, &rec->backrefs);
792         }
793         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794                 dst_orphan = malloc(sizeof(*dst_orphan));
795                 if (!dst_orphan) {
796                         ret = -ENOMEM;
797                         goto cleanup;
798                 }
799                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801         }
802         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
803         if (ret < 0)
804                 goto cleanup_rb;
805
806         return rec;
807
808 cleanup_rb:
809         rb = rb_first(&rec->holes);
810         while (rb) {
811                 struct file_extent_hole *hole;
812
813                 hole = rb_entry(rb, struct file_extent_hole, node);
814                 rb = rb_next(rb);
815                 free(hole);
816         }
817
818 cleanup:
819         if (!list_empty(&rec->backrefs))
820                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821                         list_del(&orig->list);
822                         free(orig);
823                 }
824
825         if (!list_empty(&rec->orphan_extents))
826                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827                         list_del(&orig->list);
828                         free(orig);
829                 }
830
831         free(rec);
832
833         return ERR_PTR(ret);
834 }
835
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
837                                       u64 objectid)
838 {
839         struct orphan_data_extent *orphan;
840
841         if (list_empty(orphan_extents))
842                 return;
843         printf("The following data extent is lost in tree %llu:\n",
844                objectid);
845         list_for_each_entry(orphan, orphan_extents, list) {
846                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
848                        orphan->disk_len);
849         }
850 }
851
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 {
854         u64 root_objectid = root->root_key.objectid;
855         int errors = rec->errors;
856
857         if (!errors)
858                 return;
859         /* reloc root errors, we print its corresponding fs root objectid*/
860         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861                 root_objectid = root->root_key.offset;
862                 fprintf(stderr, "reloc");
863         }
864         fprintf(stderr, "root %llu inode %llu errors %x",
865                 (unsigned long long) root_objectid,
866                 (unsigned long long) rec->ino, rec->errors);
867
868         if (errors & I_ERR_NO_INODE_ITEM)
869                 fprintf(stderr, ", no inode item");
870         if (errors & I_ERR_NO_ORPHAN_ITEM)
871                 fprintf(stderr, ", no orphan item");
872         if (errors & I_ERR_DUP_INODE_ITEM)
873                 fprintf(stderr, ", dup inode item");
874         if (errors & I_ERR_DUP_DIR_INDEX)
875                 fprintf(stderr, ", dup dir index");
876         if (errors & I_ERR_ODD_DIR_ITEM)
877                 fprintf(stderr, ", odd dir item");
878         if (errors & I_ERR_ODD_FILE_EXTENT)
879                 fprintf(stderr, ", odd file extent");
880         if (errors & I_ERR_BAD_FILE_EXTENT)
881                 fprintf(stderr, ", bad file extent");
882         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883                 fprintf(stderr, ", file extent overlap");
884         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885                 fprintf(stderr, ", file extent discount");
886         if (errors & I_ERR_DIR_ISIZE_WRONG)
887                 fprintf(stderr, ", dir isize wrong");
888         if (errors & I_ERR_FILE_NBYTES_WRONG)
889                 fprintf(stderr, ", nbytes wrong");
890         if (errors & I_ERR_ODD_CSUM_ITEM)
891                 fprintf(stderr, ", odd csum item");
892         if (errors & I_ERR_SOME_CSUM_MISSING)
893                 fprintf(stderr, ", some csum missing");
894         if (errors & I_ERR_LINK_COUNT_WRONG)
895                 fprintf(stderr, ", link count wrong");
896         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897                 fprintf(stderr, ", orphan file extent");
898         fprintf(stderr, "\n");
899         /* Print the orphan extents if needed */
900         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902
903         /* Print the holes if needed */
904         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905                 struct file_extent_hole *hole;
906                 struct rb_node *node;
907                 int found = 0;
908
909                 node = rb_first(&rec->holes);
910                 fprintf(stderr, "Found file extent holes:\n");
911                 while (node) {
912                         found = 1;
913                         hole = rb_entry(node, struct file_extent_hole, node);
914                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
915                                 hole->start, hole->len);
916                         node = rb_next(node);
917                 }
918                 if (!found)
919                         fprintf(stderr, "\tstart: 0, len: %llu\n",
920                                 round_up(rec->isize,
921                                          root->fs_info->sectorsize));
922         }
923 }
924
925 static void print_ref_error(int errors)
926 {
927         if (errors & REF_ERR_NO_DIR_ITEM)
928                 fprintf(stderr, ", no dir item");
929         if (errors & REF_ERR_NO_DIR_INDEX)
930                 fprintf(stderr, ", no dir index");
931         if (errors & REF_ERR_NO_INODE_REF)
932                 fprintf(stderr, ", no inode ref");
933         if (errors & REF_ERR_DUP_DIR_ITEM)
934                 fprintf(stderr, ", dup dir item");
935         if (errors & REF_ERR_DUP_DIR_INDEX)
936                 fprintf(stderr, ", dup dir index");
937         if (errors & REF_ERR_DUP_INODE_REF)
938                 fprintf(stderr, ", dup inode ref");
939         if (errors & REF_ERR_INDEX_UNMATCH)
940                 fprintf(stderr, ", index mismatch");
941         if (errors & REF_ERR_FILETYPE_UNMATCH)
942                 fprintf(stderr, ", filetype mismatch");
943         if (errors & REF_ERR_NAME_TOO_LONG)
944                 fprintf(stderr, ", name too long");
945         if (errors & REF_ERR_NO_ROOT_REF)
946                 fprintf(stderr, ", no root ref");
947         if (errors & REF_ERR_NO_ROOT_BACKREF)
948                 fprintf(stderr, ", no root backref");
949         if (errors & REF_ERR_DUP_ROOT_REF)
950                 fprintf(stderr, ", dup root ref");
951         if (errors & REF_ERR_DUP_ROOT_BACKREF)
952                 fprintf(stderr, ", dup root backref");
953         fprintf(stderr, "\n");
954 }
955
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
957                                           u64 ino, int mod)
958 {
959         struct ptr_node *node;
960         struct cache_extent *cache;
961         struct inode_record *rec = NULL;
962         int ret;
963
964         cache = lookup_cache_extent(inode_cache, ino, 1);
965         if (cache) {
966                 node = container_of(cache, struct ptr_node, cache);
967                 rec = node->data;
968                 if (mod && rec->refs > 1) {
969                         node->data = clone_inode_rec(rec);
970                         if (IS_ERR(node->data))
971                                 return node->data;
972                         rec->refs--;
973                         rec = node->data;
974                 }
975         } else if (mod) {
976                 rec = calloc(1, sizeof(*rec));
977                 if (!rec)
978                         return ERR_PTR(-ENOMEM);
979                 rec->ino = ino;
980                 rec->extent_start = (u64)-1;
981                 rec->refs = 1;
982                 INIT_LIST_HEAD(&rec->backrefs);
983                 INIT_LIST_HEAD(&rec->orphan_extents);
984                 rec->holes = RB_ROOT;
985
986                 node = malloc(sizeof(*node));
987                 if (!node) {
988                         free(rec);
989                         return ERR_PTR(-ENOMEM);
990                 }
991                 node->cache.start = ino;
992                 node->cache.size = 1;
993                 node->data = rec;
994
995                 if (ino == BTRFS_FREE_INO_OBJECTID)
996                         rec->found_link = 1;
997
998                 ret = insert_cache_extent(inode_cache, &node->cache);
999                 if (ret)
1000                         return ERR_PTR(-EEXIST);
1001         }
1002         return rec;
1003 }
1004
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 {
1007         struct orphan_data_extent *orphan;
1008
1009         while (!list_empty(orphan_extents)) {
1010                 orphan = list_entry(orphan_extents->next,
1011                                     struct orphan_data_extent, list);
1012                 list_del(&orphan->list);
1013                 free(orphan);
1014         }
1015 }
1016
1017 static void free_inode_rec(struct inode_record *rec)
1018 {
1019         struct inode_backref *backref;
1020
1021         if (--rec->refs > 0)
1022                 return;
1023
1024         while (!list_empty(&rec->backrefs)) {
1025                 backref = to_inode_backref(rec->backrefs.next);
1026                 list_del(&backref->list);
1027                 free(backref);
1028         }
1029         free_orphan_data_extents(&rec->orphan_extents);
1030         free_file_extent_holes(&rec->holes);
1031         free(rec);
1032 }
1033
1034 static int can_free_inode_rec(struct inode_record *rec)
1035 {
1036         if (!rec->errors && rec->checked && rec->found_inode_item &&
1037             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1038                 return 1;
1039         return 0;
1040 }
1041
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043                                  struct inode_record *rec)
1044 {
1045         struct cache_extent *cache;
1046         struct inode_backref *tmp, *backref;
1047         struct ptr_node *node;
1048         u8 filetype;
1049
1050         if (!rec->found_inode_item)
1051                 return;
1052
1053         filetype = imode_to_type(rec->imode);
1054         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055                 if (backref->found_dir_item && backref->found_dir_index) {
1056                         if (backref->filetype != filetype)
1057                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058                         if (!backref->errors && backref->found_inode_ref &&
1059                             rec->nlink == rec->found_link) {
1060                                 list_del(&backref->list);
1061                                 free(backref);
1062                         }
1063                 }
1064         }
1065
1066         if (!rec->checked || rec->merging)
1067                 return;
1068
1069         if (S_ISDIR(rec->imode)) {
1070                 if (rec->found_size != rec->isize)
1071                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072                 if (rec->found_file_extent)
1073                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075                 if (rec->found_dir_item)
1076                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1077                 if (rec->found_size != rec->nbytes)
1078                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079                 if (rec->nlink > 0 && !no_holes &&
1080                     (rec->extent_end < rec->isize ||
1081                      first_extent_gap(&rec->holes) < rec->isize))
1082                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1083         }
1084
1085         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086                 if (rec->found_csum_item && rec->nodatasum)
1087                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088                 if (rec->some_csum_missing && !rec->nodatasum)
1089                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1090         }
1091
1092         BUG_ON(rec->refs != 1);
1093         if (can_free_inode_rec(rec)) {
1094                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095                 node = container_of(cache, struct ptr_node, cache);
1096                 BUG_ON(node->data != rec);
1097                 remove_cache_extent(inode_cache, &node->cache);
1098                 free(node);
1099                 free_inode_rec(rec);
1100         }
1101 }
1102
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 {
1105         struct btrfs_path path;
1106         struct btrfs_key key;
1107         int ret;
1108
1109         key.objectid = BTRFS_ORPHAN_OBJECTID;
1110         key.type = BTRFS_ORPHAN_ITEM_KEY;
1111         key.offset = ino;
1112
1113         btrfs_init_path(&path);
1114         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115         btrfs_release_path(&path);
1116         if (ret > 0)
1117                 ret = -ENOENT;
1118         return ret;
1119 }
1120
1121 static int process_inode_item(struct extent_buffer *eb,
1122                               int slot, struct btrfs_key *key,
1123                               struct shared_node *active_node)
1124 {
1125         struct inode_record *rec;
1126         struct btrfs_inode_item *item;
1127
1128         rec = active_node->current;
1129         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130         if (rec->found_inode_item) {
1131                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1132                 return 1;
1133         }
1134         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135         rec->nlink = btrfs_inode_nlink(eb, item);
1136         rec->isize = btrfs_inode_size(eb, item);
1137         rec->nbytes = btrfs_inode_nbytes(eb, item);
1138         rec->imode = btrfs_inode_mode(eb, item);
1139         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140                 rec->nodatasum = 1;
1141         rec->found_inode_item = 1;
1142         if (rec->nlink == 0)
1143                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144         maybe_free_inode_rec(&active_node->inode_cache, rec);
1145         return 0;
1146 }
1147
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149                                                 const char *name,
1150                                                 int namelen, u64 dir)
1151 {
1152         struct inode_backref *backref;
1153
1154         list_for_each_entry(backref, &rec->backrefs, list) {
1155                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156                         break;
1157                 if (backref->dir != dir || backref->namelen != namelen)
1158                         continue;
1159                 if (memcmp(name, backref->name, namelen))
1160                         continue;
1161                 return backref;
1162         }
1163
1164         backref = malloc(sizeof(*backref) + namelen + 1);
1165         if (!backref)
1166                 return NULL;
1167         memset(backref, 0, sizeof(*backref));
1168         backref->dir = dir;
1169         backref->namelen = namelen;
1170         memcpy(backref->name, name, namelen);
1171         backref->name[namelen] = '\0';
1172         list_add_tail(&backref->list, &rec->backrefs);
1173         return backref;
1174 }
1175
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177                              u64 ino, u64 dir, u64 index,
1178                              const char *name, int namelen,
1179                              u8 filetype, u8 itemtype, int errors)
1180 {
1181         struct inode_record *rec;
1182         struct inode_backref *backref;
1183
1184         rec = get_inode_rec(inode_cache, ino, 1);
1185         BUG_ON(IS_ERR(rec));
1186         backref = get_inode_backref(rec, name, namelen, dir);
1187         BUG_ON(!backref);
1188         if (errors)
1189                 backref->errors |= errors;
1190         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191                 if (backref->found_dir_index)
1192                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193                 if (backref->found_inode_ref && backref->index != index)
1194                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1195                 if (backref->found_dir_item && backref->filetype != filetype)
1196                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197
1198                 backref->index = index;
1199                 backref->filetype = filetype;
1200                 backref->found_dir_index = 1;
1201         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202                 rec->found_link++;
1203                 if (backref->found_dir_item)
1204                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205                 if (backref->found_dir_index && backref->filetype != filetype)
1206                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207
1208                 backref->filetype = filetype;
1209                 backref->found_dir_item = 1;
1210         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212                 if (backref->found_inode_ref)
1213                         backref->errors |= REF_ERR_DUP_INODE_REF;
1214                 if (backref->found_dir_index && backref->index != index)
1215                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1216                 else
1217                         backref->index = index;
1218
1219                 backref->ref_type = itemtype;
1220                 backref->found_inode_ref = 1;
1221         } else {
1222                 BUG_ON(1);
1223         }
1224
1225         maybe_free_inode_rec(inode_cache, rec);
1226         return 0;
1227 }
1228
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230                             struct cache_tree *dst_cache)
1231 {
1232         struct inode_backref *backref;
1233         u32 dir_count = 0;
1234         int ret = 0;
1235
1236         dst->merging = 1;
1237         list_for_each_entry(backref, &src->backrefs, list) {
1238                 if (backref->found_dir_index) {
1239                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1240                                         backref->index, backref->name,
1241                                         backref->namelen, backref->filetype,
1242                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1243                 }
1244                 if (backref->found_dir_item) {
1245                         dir_count++;
1246                         add_inode_backref(dst_cache, dst->ino,
1247                                         backref->dir, 0, backref->name,
1248                                         backref->namelen, backref->filetype,
1249                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1250                 }
1251                 if (backref->found_inode_ref) {
1252                         add_inode_backref(dst_cache, dst->ino,
1253                                         backref->dir, backref->index,
1254                                         backref->name, backref->namelen, 0,
1255                                         backref->ref_type, backref->errors);
1256                 }
1257         }
1258
1259         if (src->found_dir_item)
1260                 dst->found_dir_item = 1;
1261         if (src->found_file_extent)
1262                 dst->found_file_extent = 1;
1263         if (src->found_csum_item)
1264                 dst->found_csum_item = 1;
1265         if (src->some_csum_missing)
1266                 dst->some_csum_missing = 1;
1267         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1269                 if (ret < 0)
1270                         return ret;
1271         }
1272
1273         BUG_ON(src->found_link < dir_count);
1274         dst->found_link += src->found_link - dir_count;
1275         dst->found_size += src->found_size;
1276         if (src->extent_start != (u64)-1) {
1277                 if (dst->extent_start == (u64)-1) {
1278                         dst->extent_start = src->extent_start;
1279                         dst->extent_end = src->extent_end;
1280                 } else {
1281                         if (dst->extent_end > src->extent_start)
1282                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283                         else if (dst->extent_end < src->extent_start) {
1284                                 ret = add_file_extent_hole(&dst->holes,
1285                                         dst->extent_end,
1286                                         src->extent_start - dst->extent_end);
1287                         }
1288                         if (dst->extent_end < src->extent_end)
1289                                 dst->extent_end = src->extent_end;
1290                 }
1291         }
1292
1293         dst->errors |= src->errors;
1294         if (src->found_inode_item) {
1295                 if (!dst->found_inode_item) {
1296                         dst->nlink = src->nlink;
1297                         dst->isize = src->isize;
1298                         dst->nbytes = src->nbytes;
1299                         dst->imode = src->imode;
1300                         dst->nodatasum = src->nodatasum;
1301                         dst->found_inode_item = 1;
1302                 } else {
1303                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1304                 }
1305         }
1306         dst->merging = 0;
1307
1308         return 0;
1309 }
1310
1311 static int splice_shared_node(struct shared_node *src_node,
1312                               struct shared_node *dst_node)
1313 {
1314         struct cache_extent *cache;
1315         struct ptr_node *node, *ins;
1316         struct cache_tree *src, *dst;
1317         struct inode_record *rec, *conflict;
1318         u64 current_ino = 0;
1319         int splice = 0;
1320         int ret;
1321
1322         if (--src_node->refs == 0)
1323                 splice = 1;
1324         if (src_node->current)
1325                 current_ino = src_node->current->ino;
1326
1327         src = &src_node->root_cache;
1328         dst = &dst_node->root_cache;
1329 again:
1330         cache = search_cache_extent(src, 0);
1331         while (cache) {
1332                 node = container_of(cache, struct ptr_node, cache);
1333                 rec = node->data;
1334                 cache = next_cache_extent(cache);
1335
1336                 if (splice) {
1337                         remove_cache_extent(src, &node->cache);
1338                         ins = node;
1339                 } else {
1340                         ins = malloc(sizeof(*ins));
1341                         BUG_ON(!ins);
1342                         ins->cache.start = node->cache.start;
1343                         ins->cache.size = node->cache.size;
1344                         ins->data = rec;
1345                         rec->refs++;
1346                 }
1347                 ret = insert_cache_extent(dst, &ins->cache);
1348                 if (ret == -EEXIST) {
1349                         conflict = get_inode_rec(dst, rec->ino, 1);
1350                         BUG_ON(IS_ERR(conflict));
1351                         merge_inode_recs(rec, conflict, dst);
1352                         if (rec->checked) {
1353                                 conflict->checked = 1;
1354                                 if (dst_node->current == conflict)
1355                                         dst_node->current = NULL;
1356                         }
1357                         maybe_free_inode_rec(dst, conflict);
1358                         free_inode_rec(rec);
1359                         free(ins);
1360                 } else {
1361                         BUG_ON(ret);
1362                 }
1363         }
1364
1365         if (src == &src_node->root_cache) {
1366                 src = &src_node->inode_cache;
1367                 dst = &dst_node->inode_cache;
1368                 goto again;
1369         }
1370
1371         if (current_ino > 0 && (!dst_node->current ||
1372             current_ino > dst_node->current->ino)) {
1373                 if (dst_node->current) {
1374                         dst_node->current->checked = 1;
1375                         maybe_free_inode_rec(dst, dst_node->current);
1376                 }
1377                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378                 BUG_ON(IS_ERR(dst_node->current));
1379         }
1380         return 0;
1381 }
1382
1383 static void free_inode_ptr(struct cache_extent *cache)
1384 {
1385         struct ptr_node *node;
1386         struct inode_record *rec;
1387
1388         node = container_of(cache, struct ptr_node, cache);
1389         rec = node->data;
1390         free_inode_rec(rec);
1391         free(node);
1392 }
1393
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1397                                             u64 bytenr)
1398 {
1399         struct cache_extent *cache;
1400         struct shared_node *node;
1401
1402         cache = lookup_cache_extent(shared, bytenr, 1);
1403         if (cache) {
1404                 node = container_of(cache, struct shared_node, cache);
1405                 return node;
1406         }
1407         return NULL;
1408 }
1409
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1411 {
1412         int ret;
1413         struct shared_node *node;
1414
1415         node = calloc(1, sizeof(*node));
1416         if (!node)
1417                 return -ENOMEM;
1418         node->cache.start = bytenr;
1419         node->cache.size = 1;
1420         cache_tree_init(&node->root_cache);
1421         cache_tree_init(&node->inode_cache);
1422         node->refs = refs;
1423
1424         ret = insert_cache_extent(shared, &node->cache);
1425
1426         return ret;
1427 }
1428
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430                              struct walk_control *wc, int level)
1431 {
1432         struct shared_node *node;
1433         struct shared_node *dest;
1434         int ret;
1435
1436         if (level == wc->active_node)
1437                 return 0;
1438
1439         BUG_ON(wc->active_node <= level);
1440         node = find_shared_node(&wc->shared, bytenr);
1441         if (!node) {
1442                 ret = add_shared_node(&wc->shared, bytenr, refs);
1443                 BUG_ON(ret);
1444                 node = find_shared_node(&wc->shared, bytenr);
1445                 wc->nodes[level] = node;
1446                 wc->active_node = level;
1447                 return 0;
1448         }
1449
1450         if (wc->root_level == wc->active_node &&
1451             btrfs_root_refs(&root->root_item) == 0) {
1452                 if (--node->refs == 0) {
1453                         free_inode_recs_tree(&node->root_cache);
1454                         free_inode_recs_tree(&node->inode_cache);
1455                         remove_cache_extent(&wc->shared, &node->cache);
1456                         free(node);
1457                 }
1458                 return 1;
1459         }
1460
1461         dest = wc->nodes[wc->active_node];
1462         splice_shared_node(node, dest);
1463         if (node->refs == 0) {
1464                 remove_cache_extent(&wc->shared, &node->cache);
1465                 free(node);
1466         }
1467         return 1;
1468 }
1469
1470 static int leave_shared_node(struct btrfs_root *root,
1471                              struct walk_control *wc, int level)
1472 {
1473         struct shared_node *node;
1474         struct shared_node *dest;
1475         int i;
1476
1477         if (level == wc->root_level)
1478                 return 0;
1479
1480         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481                 if (wc->nodes[i])
1482                         break;
1483         }
1484         BUG_ON(i >= BTRFS_MAX_LEVEL);
1485
1486         node = wc->nodes[wc->active_node];
1487         wc->nodes[wc->active_node] = NULL;
1488         wc->active_node = i;
1489
1490         dest = wc->nodes[wc->active_node];
1491         if (wc->active_node < wc->root_level ||
1492             btrfs_root_refs(&root->root_item) > 0) {
1493                 BUG_ON(node->refs <= 1);
1494                 splice_shared_node(node, dest);
1495         } else {
1496                 BUG_ON(node->refs < 2);
1497                 node->refs--;
1498         }
1499         return 0;
1500 }
1501
1502 /*
1503  * Returns:
1504  * < 0 - on error
1505  * 1   - if the root with id child_root_id is a child of root parent_root_id
1506  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1507  *       has other root(s) as parent(s)
1508  * 2   - if the root child_root_id doesn't have any parent roots
1509  */
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1511                          u64 child_root_id)
1512 {
1513         struct btrfs_path path;
1514         struct btrfs_key key;
1515         struct extent_buffer *leaf;
1516         int has_parent = 0;
1517         int ret;
1518
1519         btrfs_init_path(&path);
1520
1521         key.objectid = parent_root_id;
1522         key.type = BTRFS_ROOT_REF_KEY;
1523         key.offset = child_root_id;
1524         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525                                 0, 0);
1526         if (ret < 0)
1527                 return ret;
1528         btrfs_release_path(&path);
1529         if (!ret)
1530                 return 1;
1531
1532         key.objectid = child_root_id;
1533         key.type = BTRFS_ROOT_BACKREF_KEY;
1534         key.offset = 0;
1535         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1536                                 0, 0);
1537         if (ret < 0)
1538                 goto out;
1539
1540         while (1) {
1541                 leaf = path.nodes[0];
1542                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1544                         if (ret)
1545                                 break;
1546                         leaf = path.nodes[0];
1547                 }
1548
1549                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550                 if (key.objectid != child_root_id ||
1551                     key.type != BTRFS_ROOT_BACKREF_KEY)
1552                         break;
1553
1554                 has_parent = 1;
1555
1556                 if (key.offset == parent_root_id) {
1557                         btrfs_release_path(&path);
1558                         return 1;
1559                 }
1560
1561                 path.slots[0]++;
1562         }
1563 out:
1564         btrfs_release_path(&path);
1565         if (ret < 0)
1566                 return ret;
1567         return has_parent ? 0 : 2;
1568 }
1569
1570 static int process_dir_item(struct extent_buffer *eb,
1571                             int slot, struct btrfs_key *key,
1572                             struct shared_node *active_node)
1573 {
1574         u32 total;
1575         u32 cur = 0;
1576         u32 len;
1577         u32 name_len;
1578         u32 data_len;
1579         int error;
1580         int nritems = 0;
1581         u8 filetype;
1582         struct btrfs_dir_item *di;
1583         struct inode_record *rec;
1584         struct cache_tree *root_cache;
1585         struct cache_tree *inode_cache;
1586         struct btrfs_key location;
1587         char namebuf[BTRFS_NAME_LEN];
1588
1589         root_cache = &active_node->root_cache;
1590         inode_cache = &active_node->inode_cache;
1591         rec = active_node->current;
1592         rec->found_dir_item = 1;
1593
1594         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595         total = btrfs_item_size_nr(eb, slot);
1596         while (cur < total) {
1597                 nritems++;
1598                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599                 name_len = btrfs_dir_name_len(eb, di);
1600                 data_len = btrfs_dir_data_len(eb, di);
1601                 filetype = btrfs_dir_type(eb, di);
1602
1603                 rec->found_size += name_len;
1604                 if (cur + sizeof(*di) + name_len > total ||
1605                     name_len > BTRFS_NAME_LEN) {
1606                         error = REF_ERR_NAME_TOO_LONG;
1607
1608                         if (cur + sizeof(*di) > total)
1609                                 break;
1610                         len = min_t(u32, total - cur - sizeof(*di),
1611                                     BTRFS_NAME_LEN);
1612                 } else {
1613                         len = name_len;
1614                         error = 0;
1615                 }
1616
1617                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618
1619                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620                     key->offset != btrfs_name_hash(namebuf, len)) {
1621                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1622                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623                         key->objectid, key->offset, namebuf, len, filetype,
1624                         key->offset, btrfs_name_hash(namebuf, len));
1625                 }
1626
1627                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628                         add_inode_backref(inode_cache, location.objectid,
1629                                           key->objectid, key->offset, namebuf,
1630                                           len, filetype, key->type, error);
1631                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632                         add_inode_backref(root_cache, location.objectid,
1633                                           key->objectid, key->offset,
1634                                           namebuf, len, filetype,
1635                                           key->type, error);
1636                 } else {
1637                         fprintf(stderr, "invalid location in dir item %u\n",
1638                                 location.type);
1639                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640                                           key->objectid, key->offset, namebuf,
1641                                           len, filetype, key->type, error);
1642                 }
1643
1644                 len = sizeof(*di) + name_len + data_len;
1645                 di = (struct btrfs_dir_item *)((char *)di + len);
1646                 cur += len;
1647         }
1648         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1650
1651         return 0;
1652 }
1653
1654 static int process_inode_ref(struct extent_buffer *eb,
1655                              int slot, struct btrfs_key *key,
1656                              struct shared_node *active_node)
1657 {
1658         u32 total;
1659         u32 cur = 0;
1660         u32 len;
1661         u32 name_len;
1662         u64 index;
1663         int error;
1664         struct cache_tree *inode_cache;
1665         struct btrfs_inode_ref *ref;
1666         char namebuf[BTRFS_NAME_LEN];
1667
1668         inode_cache = &active_node->inode_cache;
1669
1670         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671         total = btrfs_item_size_nr(eb, slot);
1672         while (cur < total) {
1673                 name_len = btrfs_inode_ref_name_len(eb, ref);
1674                 index = btrfs_inode_ref_index(eb, ref);
1675
1676                 /* inode_ref + namelen should not cross item boundary */
1677                 if (cur + sizeof(*ref) + name_len > total ||
1678                     name_len > BTRFS_NAME_LEN) {
1679                         if (total < cur + sizeof(*ref))
1680                                 break;
1681
1682                         /* Still try to read out the remaining part */
1683                         len = min_t(u32, total - cur - sizeof(*ref),
1684                                     BTRFS_NAME_LEN);
1685                         error = REF_ERR_NAME_TOO_LONG;
1686                 } else {
1687                         len = name_len;
1688                         error = 0;
1689                 }
1690
1691                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692                 add_inode_backref(inode_cache, key->objectid, key->offset,
1693                                   index, namebuf, len, 0, key->type, error);
1694
1695                 len = sizeof(*ref) + name_len;
1696                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1697                 cur += len;
1698         }
1699         return 0;
1700 }
1701
1702 static int process_inode_extref(struct extent_buffer *eb,
1703                                 int slot, struct btrfs_key *key,
1704                                 struct shared_node *active_node)
1705 {
1706         u32 total;
1707         u32 cur = 0;
1708         u32 len;
1709         u32 name_len;
1710         u64 index;
1711         u64 parent;
1712         int error;
1713         struct cache_tree *inode_cache;
1714         struct btrfs_inode_extref *extref;
1715         char namebuf[BTRFS_NAME_LEN];
1716
1717         inode_cache = &active_node->inode_cache;
1718
1719         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720         total = btrfs_item_size_nr(eb, slot);
1721         while (cur < total) {
1722                 name_len = btrfs_inode_extref_name_len(eb, extref);
1723                 index = btrfs_inode_extref_index(eb, extref);
1724                 parent = btrfs_inode_extref_parent(eb, extref);
1725                 if (name_len <= BTRFS_NAME_LEN) {
1726                         len = name_len;
1727                         error = 0;
1728                 } else {
1729                         len = BTRFS_NAME_LEN;
1730                         error = REF_ERR_NAME_TOO_LONG;
1731                 }
1732                 read_extent_buffer(eb, namebuf,
1733                                    (unsigned long)(extref + 1), len);
1734                 add_inode_backref(inode_cache, key->objectid, parent,
1735                                   index, namebuf, len, 0, key->type, error);
1736
1737                 len = sizeof(*extref) + name_len;
1738                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1739                 cur += len;
1740         }
1741         return 0;
1742
1743 }
1744
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746                             u64 len, u64 *found)
1747 {
1748         struct btrfs_key key;
1749         struct btrfs_path path;
1750         struct extent_buffer *leaf;
1751         int ret;
1752         size_t size;
1753         *found = 0;
1754         u64 csum_end;
1755         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756
1757         btrfs_init_path(&path);
1758
1759         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760         key.offset = start;
1761         key.type = BTRFS_EXTENT_CSUM_KEY;
1762
1763         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764                                 &key, &path, 0, 0);
1765         if (ret < 0)
1766                 goto out;
1767         if (ret > 0 && path.slots[0] > 0) {
1768                 leaf = path.nodes[0];
1769                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771                     key.type == BTRFS_EXTENT_CSUM_KEY)
1772                         path.slots[0]--;
1773         }
1774
1775         while (len > 0) {
1776                 leaf = path.nodes[0];
1777                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1779                         if (ret > 0)
1780                                 break;
1781                         else if (ret < 0)
1782                                 goto out;
1783                         leaf = path.nodes[0];
1784                 }
1785
1786                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788                     key.type != BTRFS_EXTENT_CSUM_KEY)
1789                         break;
1790
1791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792                 if (key.offset >= start + len)
1793                         break;
1794
1795                 if (key.offset > start)
1796                         start = key.offset;
1797
1798                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799                 csum_end = key.offset + (size / csum_size) *
1800                            root->fs_info->sectorsize;
1801                 if (csum_end > start) {
1802                         size = min(csum_end - start, len);
1803                         len -= size;
1804                         start += size;
1805                         *found += size;
1806                 }
1807
1808                 path.slots[0]++;
1809         }
1810 out:
1811         btrfs_release_path(&path);
1812         if (ret < 0)
1813                 return ret;
1814         return 0;
1815 }
1816
1817 static int process_file_extent(struct btrfs_root *root,
1818                                 struct extent_buffer *eb,
1819                                 int slot, struct btrfs_key *key,
1820                                 struct shared_node *active_node)
1821 {
1822         struct inode_record *rec;
1823         struct btrfs_file_extent_item *fi;
1824         u64 num_bytes = 0;
1825         u64 disk_bytenr = 0;
1826         u64 extent_offset = 0;
1827         u64 mask = root->fs_info->sectorsize - 1;
1828         int extent_type;
1829         int ret;
1830
1831         rec = active_node->current;
1832         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833         rec->found_file_extent = 1;
1834
1835         if (rec->extent_start == (u64)-1) {
1836                 rec->extent_start = key->offset;
1837                 rec->extent_end = key->offset;
1838         }
1839
1840         if (rec->extent_end > key->offset)
1841                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842         else if (rec->extent_end < key->offset) {
1843                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844                                            key->offset - rec->extent_end);
1845                 if (ret < 0)
1846                         return ret;
1847         }
1848
1849         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850         extent_type = btrfs_file_extent_type(eb, fi);
1851
1852         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854                 if (num_bytes == 0)
1855                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856                 rec->found_size += num_bytes;
1857                 num_bytes = (num_bytes + mask) & ~mask;
1858         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862                 extent_offset = btrfs_file_extent_offset(eb, fi);
1863                 if (num_bytes == 0 || (num_bytes & mask))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (num_bytes + extent_offset >
1866                     btrfs_file_extent_ram_bytes(eb, fi))
1867                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869                     (btrfs_file_extent_compression(eb, fi) ||
1870                      btrfs_file_extent_encryption(eb, fi) ||
1871                      btrfs_file_extent_other_encoding(eb, fi)))
1872                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873                 if (disk_bytenr > 0)
1874                         rec->found_size += num_bytes;
1875         } else {
1876                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877         }
1878         rec->extent_end = key->offset + num_bytes;
1879
1880         /*
1881          * The data reloc tree will copy full extents into its inode and then
1882          * copy the corresponding csums.  Because the extent it copied could be
1883          * a preallocated extent that hasn't been written to yet there may be no
1884          * csums to copy, ergo we won't have csums for our file extent.  This is
1885          * ok so just don't bother checking csums if the inode belongs to the
1886          * data reloc tree.
1887          */
1888         if (disk_bytenr > 0 &&
1889             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890                 u64 found;
1891                 if (btrfs_file_extent_compression(eb, fi))
1892                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893                 else
1894                         disk_bytenr += extent_offset;
1895
1896                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1897                 if (ret < 0)
1898                         return ret;
1899                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900                         if (found > 0)
1901                                 rec->found_csum_item = 1;
1902                         if (found < num_bytes)
1903                                 rec->some_csum_missing = 1;
1904                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905                         if (found > 0)
1906                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913                             struct walk_control *wc)
1914 {
1915         struct btrfs_key key;
1916         u32 nritems;
1917         int i;
1918         int ret = 0;
1919         struct cache_tree *inode_cache;
1920         struct shared_node *active_node;
1921
1922         if (wc->root_level == wc->active_node &&
1923             btrfs_root_refs(&root->root_item) == 0)
1924                 return 0;
1925
1926         active_node = wc->nodes[wc->active_node];
1927         inode_cache = &active_node->inode_cache;
1928         nritems = btrfs_header_nritems(eb);
1929         for (i = 0; i < nritems; i++) {
1930                 btrfs_item_key_to_cpu(eb, &key, i);
1931
1932                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933                         continue;
1934                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1935                         continue;
1936
1937                 if (active_node->current == NULL ||
1938                     active_node->current->ino < key.objectid) {
1939                         if (active_node->current) {
1940                                 active_node->current->checked = 1;
1941                                 maybe_free_inode_rec(inode_cache,
1942                                                      active_node->current);
1943                         }
1944                         active_node->current = get_inode_rec(inode_cache,
1945                                                              key.objectid, 1);
1946                         BUG_ON(IS_ERR(active_node->current));
1947                 }
1948                 switch (key.type) {
1949                 case BTRFS_DIR_ITEM_KEY:
1950                 case BTRFS_DIR_INDEX_KEY:
1951                         ret = process_dir_item(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_REF_KEY:
1954                         ret = process_inode_ref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_EXTREF_KEY:
1957                         ret = process_inode_extref(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_INODE_ITEM_KEY:
1960                         ret = process_inode_item(eb, i, &key, active_node);
1961                         break;
1962                 case BTRFS_EXTENT_DATA_KEY:
1963                         ret = process_file_extent(root, eb, i, &key,
1964                                                   active_node);
1965                         break;
1966                 default:
1967                         break;
1968                 };
1969         }
1970         return ret;
1971 }
1972
1973 struct node_refs {
1974         u64 bytenr[BTRFS_MAX_LEVEL];
1975         u64 refs[BTRFS_MAX_LEVEL];
1976         int need_check[BTRFS_MAX_LEVEL];
1977 };
1978
1979 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1980                              struct node_refs *nrefs, u64 level);
1981 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1982                             unsigned int ext_ref);
1983
1984 /*
1985  * Returns >0  Found error, not fatal, should continue
1986  * Returns <0  Fatal error, must exit the whole check
1987  * Returns 0   No errors found
1988  */
1989 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1990                                struct node_refs *nrefs, int *level, int ext_ref)
1991 {
1992         struct extent_buffer *cur = path->nodes[0];
1993         struct btrfs_key key;
1994         u64 cur_bytenr;
1995         u32 nritems;
1996         u64 first_ino = 0;
1997         int root_level = btrfs_header_level(root->node);
1998         int i;
1999         int ret = 0; /* Final return value */
2000         int err = 0; /* Positive error bitmap */
2001
2002         cur_bytenr = cur->start;
2003
2004         /* skip to first inode item or the first inode number change */
2005         nritems = btrfs_header_nritems(cur);
2006         for (i = 0; i < nritems; i++) {
2007                 btrfs_item_key_to_cpu(cur, &key, i);
2008                 if (i == 0)
2009                         first_ino = key.objectid;
2010                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2011                     (first_ino && first_ino != key.objectid))
2012                         break;
2013         }
2014         if (i == nritems) {
2015                 path->slots[0] = nritems;
2016                 return 0;
2017         }
2018         path->slots[0] = i;
2019
2020 again:
2021         err |= check_inode_item(root, path, ext_ref);
2022
2023         /* modify cur since check_inode_item may change path */
2024         cur = path->nodes[0];
2025
2026         if (err & LAST_ITEM)
2027                 goto out;
2028
2029         /* still have inode items in thie leaf */
2030         if (cur->start == cur_bytenr)
2031                 goto again;
2032
2033         /*
2034          * we have switched to another leaf, above nodes may
2035          * have changed, here walk down the path, if a node
2036          * or leaf is shared, check whether we can skip this
2037          * node or leaf.
2038          */
2039         for (i = root_level; i >= 0; i--) {
2040                 if (path->nodes[i]->start == nrefs->bytenr[i])
2041                         continue;
2042
2043                 ret = update_nodes_refs(root,
2044                                 path->nodes[i]->start,
2045                                 nrefs, i);
2046                 if (ret)
2047                         goto out;
2048
2049                 if (!nrefs->need_check[i]) {
2050                         *level += 1;
2051                         break;
2052                 }
2053         }
2054
2055         for (i = 0; i < *level; i++) {
2056                 free_extent_buffer(path->nodes[i]);
2057                 path->nodes[i] = NULL;
2058         }
2059 out:
2060         err &= ~LAST_ITEM;
2061         if (err && !ret)
2062                 ret = err;
2063         return ret;
2064 }
2065
2066 static void reada_walk_down(struct btrfs_root *root,
2067                             struct extent_buffer *node, int slot)
2068 {
2069         struct btrfs_fs_info *fs_info = root->fs_info;
2070         u64 bytenr;
2071         u64 ptr_gen;
2072         u32 nritems;
2073         int i;
2074         int level;
2075
2076         level = btrfs_header_level(node);
2077         if (level != 1)
2078                 return;
2079
2080         nritems = btrfs_header_nritems(node);
2081         for (i = slot; i < nritems; i++) {
2082                 bytenr = btrfs_node_blockptr(node, i);
2083                 ptr_gen = btrfs_node_ptr_generation(node, i);
2084                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2085         }
2086 }
2087
2088 /*
2089  * Check the child node/leaf by the following condition:
2090  * 1. the first item key of the node/leaf should be the same with the one
2091  *    in parent.
2092  * 2. block in parent node should match the child node/leaf.
2093  * 3. generation of parent node and child's header should be consistent.
2094  *
2095  * Or the child node/leaf pointed by the key in parent is not valid.
2096  *
2097  * We hope to check leaf owner too, but since subvol may share leaves,
2098  * which makes leaf owner check not so strong, key check should be
2099  * sufficient enough for that case.
2100  */
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102                             struct extent_buffer *child)
2103 {
2104         struct btrfs_key parent_key;
2105         struct btrfs_key child_key;
2106         int ret = 0;
2107
2108         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109         if (btrfs_header_level(child) == 0)
2110                 btrfs_item_key_to_cpu(child, &child_key, 0);
2111         else
2112                 btrfs_node_key_to_cpu(child, &child_key, 0);
2113
2114         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2115                 ret = -EINVAL;
2116                 fprintf(stderr,
2117                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118                         parent_key.objectid, parent_key.type, parent_key.offset,
2119                         child_key.objectid, child_key.type, child_key.offset);
2120         }
2121         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122                 ret = -EINVAL;
2123                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124                         btrfs_node_blockptr(parent, slot),
2125                         btrfs_header_bytenr(child));
2126         }
2127         if (btrfs_node_ptr_generation(parent, slot) !=
2128             btrfs_header_generation(child)) {
2129                 ret = -EINVAL;
2130                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131                         btrfs_header_generation(child),
2132                         btrfs_node_ptr_generation(parent, slot));
2133         }
2134         return ret;
2135 }
2136
2137 /*
2138  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139  * in every fs or file tree check. Here we find its all root ids, and only check
2140  * it in the fs or file tree which has the smallest root id.
2141  */
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 {
2144         struct rb_node *node;
2145         struct ulist_node *u;
2146
2147         if (roots->nnodes == 1)
2148                 return 1;
2149
2150         node = rb_first(&roots->root);
2151         u = rb_entry(node, struct ulist_node, rb_node);
2152         /*
2153          * current root id is not smallest, we skip it and let it be checked
2154          * in the fs or file tree who hash the smallest root id.
2155          */
2156         if (root->objectid != u->val)
2157                 return 0;
2158
2159         return 1;
2160 }
2161
2162 /*
2163  * for a tree node or leaf, we record its reference count, so later if we still
2164  * process this node or leaf, don't need to compute its reference count again.
2165  */
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167                              struct node_refs *nrefs, u64 level)
2168 {
2169         int check, ret;
2170         u64 refs;
2171         struct ulist *roots;
2172
2173         if (nrefs->bytenr[level] != bytenr) {
2174                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175                                        level, 1, &refs, NULL);
2176                 if (ret < 0)
2177                         return ret;
2178
2179                 nrefs->bytenr[level] = bytenr;
2180                 nrefs->refs[level] = refs;
2181                 if (refs > 1) {
2182                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2183                                                    0, &roots);
2184                         if (ret)
2185                                 return -EIO;
2186
2187                         check = need_check(root, roots);
2188                         ulist_free(roots);
2189                         nrefs->need_check[level] = check;
2190                 } else {
2191                         nrefs->need_check[level] = 1;
2192                 }
2193         }
2194
2195         return 0;
2196 }
2197
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199                           struct walk_control *wc, int *level,
2200                           struct node_refs *nrefs)
2201 {
2202         enum btrfs_tree_block_status status;
2203         u64 bytenr;
2204         u64 ptr_gen;
2205         struct btrfs_fs_info *fs_info = root->fs_info;
2206         struct extent_buffer *next;
2207         struct extent_buffer *cur;
2208         int ret, err = 0;
2209         u64 refs;
2210
2211         WARN_ON(*level < 0);
2212         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213
2214         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215                 refs = nrefs->refs[*level];
2216                 ret = 0;
2217         } else {
2218                 ret = btrfs_lookup_extent_info(NULL, root,
2219                                        path->nodes[*level]->start,
2220                                        *level, 1, &refs, NULL);
2221                 if (ret < 0) {
2222                         err = ret;
2223                         goto out;
2224                 }
2225                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226                 nrefs->refs[*level] = refs;
2227         }
2228
2229         if (refs > 1) {
2230                 ret = enter_shared_node(root, path->nodes[*level]->start,
2231                                         refs, wc, *level);
2232                 if (ret > 0) {
2233                         err = ret;
2234                         goto out;
2235                 }
2236         }
2237
2238         while (*level >= 0) {
2239                 WARN_ON(*level < 0);
2240                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241                 cur = path->nodes[*level];
2242
2243                 if (btrfs_header_level(cur) != *level)
2244                         WARN_ON(1);
2245
2246                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2247                         break;
2248                 if (*level == 0) {
2249                         ret = process_one_leaf(root, cur, wc);
2250                         if (ret < 0)
2251                                 err = ret;
2252                         break;
2253                 }
2254                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256
2257                 if (bytenr == nrefs->bytenr[*level - 1]) {
2258                         refs = nrefs->refs[*level - 1];
2259                 } else {
2260                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261                                         *level - 1, 1, &refs, NULL);
2262                         if (ret < 0) {
2263                                 refs = 0;
2264                         } else {
2265                                 nrefs->bytenr[*level - 1] = bytenr;
2266                                 nrefs->refs[*level - 1] = refs;
2267                         }
2268                 }
2269
2270                 if (refs > 1) {
2271                         ret = enter_shared_node(root, bytenr, refs,
2272                                                 wc, *level - 1);
2273                         if (ret > 0) {
2274                                 path->slots[*level]++;
2275                                 continue;
2276                         }
2277                 }
2278
2279                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284                         if (!extent_buffer_uptodate(next)) {
2285                                 struct btrfs_key node_key;
2286
2287                                 btrfs_node_key_to_cpu(path->nodes[*level],
2288                                                       &node_key,
2289                                                       path->slots[*level]);
2290                                 btrfs_add_corrupt_extent_record(root->fs_info,
2291                                                 &node_key,
2292                                                 path->nodes[*level]->start,
2293                                                 root->fs_info->nodesize,
2294                                                 *level);
2295                                 err = -EIO;
2296                                 goto out;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret) {
2302                         free_extent_buffer(next);
2303                         err = ret;
2304                         goto out;
2305                 }
2306
2307                 if (btrfs_is_leaf(next))
2308                         status = btrfs_check_leaf(root, NULL, next);
2309                 else
2310                         status = btrfs_check_node(root, NULL, next);
2311                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312                         free_extent_buffer(next);
2313                         err = -EIO;
2314                         goto out;
2315                 }
2316
2317                 *level = *level - 1;
2318                 free_extent_buffer(path->nodes[*level]);
2319                 path->nodes[*level] = next;
2320                 path->slots[*level] = 0;
2321         }
2322 out:
2323         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324         return err;
2325 }
2326
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328                             unsigned int ext_ref);
2329
2330 /*
2331  * Returns >0  Found error, should continue
2332  * Returns <0  Fatal error, must exit the whole check
2333  * Returns 0   No errors found
2334  */
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336                              int *level, struct node_refs *nrefs, int ext_ref)
2337 {
2338         enum btrfs_tree_block_status status;
2339         u64 bytenr;
2340         u64 ptr_gen;
2341         struct btrfs_fs_info *fs_info = root->fs_info;
2342         struct extent_buffer *next;
2343         struct extent_buffer *cur;
2344         int ret;
2345
2346         WARN_ON(*level < 0);
2347         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348
2349         ret = update_nodes_refs(root, path->nodes[*level]->start,
2350                                 nrefs, *level);
2351         if (ret < 0)
2352                 return ret;
2353
2354         while (*level >= 0) {
2355                 WARN_ON(*level < 0);
2356                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357                 cur = path->nodes[*level];
2358
2359                 if (btrfs_header_level(cur) != *level)
2360                         WARN_ON(1);
2361
2362                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363                         break;
2364                 /* Don't forgot to check leaf/node validation */
2365                 if (*level == 0) {
2366                         ret = btrfs_check_leaf(root, NULL, cur);
2367                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368                                 ret = -EIO;
2369                                 break;
2370                         }
2371                         ret = process_one_leaf_v2(root, path, nrefs,
2372                                                   level, ext_ref);
2373                         cur = path->nodes[*level];
2374                         break;
2375                 } else {
2376                         ret = btrfs_check_node(root, NULL, cur);
2377                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2378                                 ret = -EIO;
2379                                 break;
2380                         }
2381                 }
2382                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2383                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384
2385                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2386                 if (ret)
2387                         break;
2388                 if (!nrefs->need_check[*level - 1]) {
2389                         path->slots[*level]++;
2390                         continue;
2391                 }
2392
2393                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2394                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2395                         free_extent_buffer(next);
2396                         reada_walk_down(root, cur, path->slots[*level]);
2397                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2398                         if (!extent_buffer_uptodate(next)) {
2399                                 struct btrfs_key node_key;
2400
2401                                 btrfs_node_key_to_cpu(path->nodes[*level],
2402                                                       &node_key,
2403                                                       path->slots[*level]);
2404                                 btrfs_add_corrupt_extent_record(fs_info,
2405                                                 &node_key,
2406                                                 path->nodes[*level]->start,
2407                                                 fs_info->nodesize,
2408                                                 *level);
2409                                 ret = -EIO;
2410                                 break;
2411                         }
2412                 }
2413
2414                 ret = check_child_node(cur, path->slots[*level], next);
2415                 if (ret < 0) 
2416                         break;
2417
2418                 if (btrfs_is_leaf(next))
2419                         status = btrfs_check_leaf(root, NULL, next);
2420                 else
2421                         status = btrfs_check_node(root, NULL, next);
2422                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2423                         free_extent_buffer(next);
2424                         ret = -EIO;
2425                         break;
2426                 }
2427
2428                 *level = *level - 1;
2429                 free_extent_buffer(path->nodes[*level]);
2430                 path->nodes[*level] = next;
2431                 path->slots[*level] = 0;
2432         }
2433         return ret;
2434 }
2435
2436 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2437                         struct walk_control *wc, int *level)
2438 {
2439         int i;
2440         struct extent_buffer *leaf;
2441
2442         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2443                 leaf = path->nodes[i];
2444                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2445                         path->slots[i]++;
2446                         *level = i;
2447                         return 0;
2448                 } else {
2449                         free_extent_buffer(path->nodes[*level]);
2450                         path->nodes[*level] = NULL;
2451                         BUG_ON(*level > wc->active_node);
2452                         if (*level == wc->active_node)
2453                                 leave_shared_node(root, wc, *level);
2454                         *level = i + 1;
2455                 }
2456         }
2457         return 1;
2458 }
2459
2460 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2461                            int *level)
2462 {
2463         int i;
2464         struct extent_buffer *leaf;
2465
2466         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2467                 leaf = path->nodes[i];
2468                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2469                         path->slots[i]++;
2470                         *level = i;
2471                         return 0;
2472                 } else {
2473                         free_extent_buffer(path->nodes[*level]);
2474                         path->nodes[*level] = NULL;
2475                         *level = i + 1;
2476                 }
2477         }
2478         return 1;
2479 }
2480
2481 static int check_root_dir(struct inode_record *rec)
2482 {
2483         struct inode_backref *backref;
2484         int ret = -1;
2485
2486         if (!rec->found_inode_item || rec->errors)
2487                 goto out;
2488         if (rec->nlink != 1 || rec->found_link != 0)
2489                 goto out;
2490         if (list_empty(&rec->backrefs))
2491                 goto out;
2492         backref = to_inode_backref(rec->backrefs.next);
2493         if (!backref->found_inode_ref)
2494                 goto out;
2495         if (backref->index != 0 || backref->namelen != 2 ||
2496             memcmp(backref->name, "..", 2))
2497                 goto out;
2498         if (backref->found_dir_index || backref->found_dir_item)
2499                 goto out;
2500         ret = 0;
2501 out:
2502         return ret;
2503 }
2504
2505 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2506                               struct btrfs_root *root, struct btrfs_path *path,
2507                               struct inode_record *rec)
2508 {
2509         struct btrfs_inode_item *ei;
2510         struct btrfs_key key;
2511         int ret;
2512
2513         key.objectid = rec->ino;
2514         key.type = BTRFS_INODE_ITEM_KEY;
2515         key.offset = (u64)-1;
2516
2517         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2518         if (ret < 0)
2519                 goto out;
2520         if (ret) {
2521                 if (!path->slots[0]) {
2522                         ret = -ENOENT;
2523                         goto out;
2524                 }
2525                 path->slots[0]--;
2526                 ret = 0;
2527         }
2528         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2529         if (key.objectid != rec->ino) {
2530                 ret = -ENOENT;
2531                 goto out;
2532         }
2533
2534         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2535                             struct btrfs_inode_item);
2536         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2537         btrfs_mark_buffer_dirty(path->nodes[0]);
2538         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2539         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2540                root->root_key.objectid);
2541 out:
2542         btrfs_release_path(path);
2543         return ret;
2544 }
2545
2546 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2547                                     struct btrfs_root *root,
2548                                     struct btrfs_path *path,
2549                                     struct inode_record *rec)
2550 {
2551         int ret;
2552
2553         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2554         btrfs_release_path(path);
2555         if (!ret)
2556                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2557         return ret;
2558 }
2559
2560 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2561                                struct btrfs_root *root,
2562                                struct btrfs_path *path,
2563                                struct inode_record *rec)
2564 {
2565         struct btrfs_inode_item *ei;
2566         struct btrfs_key key;
2567         int ret = 0;
2568
2569         key.objectid = rec->ino;
2570         key.type = BTRFS_INODE_ITEM_KEY;
2571         key.offset = 0;
2572
2573         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2574         if (ret) {
2575                 if (ret > 0)
2576                         ret = -ENOENT;
2577                 goto out;
2578         }
2579
2580         /* Since ret == 0, no need to check anything */
2581         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582                             struct btrfs_inode_item);
2583         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2584         btrfs_mark_buffer_dirty(path->nodes[0]);
2585         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2586         printf("reset nbytes for ino %llu root %llu\n",
2587                rec->ino, root->root_key.objectid);
2588 out:
2589         btrfs_release_path(path);
2590         return ret;
2591 }
2592
2593 static int add_missing_dir_index(struct btrfs_root *root,
2594                                  struct cache_tree *inode_cache,
2595                                  struct inode_record *rec,
2596                                  struct inode_backref *backref)
2597 {
2598         struct btrfs_path path;
2599         struct btrfs_trans_handle *trans;
2600         struct btrfs_dir_item *dir_item;
2601         struct extent_buffer *leaf;
2602         struct btrfs_key key;
2603         struct btrfs_disk_key disk_key;
2604         struct inode_record *dir_rec;
2605         unsigned long name_ptr;
2606         u32 data_size = sizeof(*dir_item) + backref->namelen;
2607         int ret;
2608
2609         trans = btrfs_start_transaction(root, 1);
2610         if (IS_ERR(trans))
2611                 return PTR_ERR(trans);
2612
2613         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2614                 (unsigned long long)rec->ino);
2615
2616         btrfs_init_path(&path);
2617         key.objectid = backref->dir;
2618         key.type = BTRFS_DIR_INDEX_KEY;
2619         key.offset = backref->index;
2620         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2621         BUG_ON(ret);
2622
2623         leaf = path.nodes[0];
2624         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625
2626         disk_key.objectid = cpu_to_le64(rec->ino);
2627         disk_key.type = BTRFS_INODE_ITEM_KEY;
2628         disk_key.offset = 0;
2629
2630         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2631         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2632         btrfs_set_dir_data_len(leaf, dir_item, 0);
2633         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2634         name_ptr = (unsigned long)(dir_item + 1);
2635         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2636         btrfs_mark_buffer_dirty(leaf);
2637         btrfs_release_path(&path);
2638         btrfs_commit_transaction(trans, root);
2639
2640         backref->found_dir_index = 1;
2641         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2642         BUG_ON(IS_ERR(dir_rec));
2643         if (!dir_rec)
2644                 return 0;
2645         dir_rec->found_size += backref->namelen;
2646         if (dir_rec->found_size == dir_rec->isize &&
2647             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2648                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2649         if (dir_rec->found_size != dir_rec->isize)
2650                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2651
2652         return 0;
2653 }
2654
2655 static int delete_dir_index(struct btrfs_root *root,
2656                             struct inode_backref *backref)
2657 {
2658         struct btrfs_trans_handle *trans;
2659         struct btrfs_dir_item *di;
2660         struct btrfs_path path;
2661         int ret = 0;
2662
2663         trans = btrfs_start_transaction(root, 1);
2664         if (IS_ERR(trans))
2665                 return PTR_ERR(trans);
2666
2667         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2668                 (unsigned long long)backref->dir,
2669                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2670                 (unsigned long long)root->objectid);
2671
2672         btrfs_init_path(&path);
2673         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2674                                     backref->name, backref->namelen,
2675                                     backref->index, -1);
2676         if (IS_ERR(di)) {
2677                 ret = PTR_ERR(di);
2678                 btrfs_release_path(&path);
2679                 btrfs_commit_transaction(trans, root);
2680                 if (ret == -ENOENT)
2681                         return 0;
2682                 return ret;
2683         }
2684
2685         if (!di)
2686                 ret = btrfs_del_item(trans, root, &path);
2687         else
2688                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689         BUG_ON(ret);
2690         btrfs_release_path(&path);
2691         btrfs_commit_transaction(trans, root);
2692         return ret;
2693 }
2694
2695 static int __create_inode_item(struct btrfs_trans_handle *trans,
2696                                struct btrfs_root *root, u64 ino, u64 size,
2697                                u64 nbytes, u64 nlink, u32 mode)
2698 {
2699         struct btrfs_inode_item ii;
2700         time_t now = time(NULL);
2701         int ret;
2702
2703         btrfs_set_stack_inode_size(&ii, size);
2704         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2705         btrfs_set_stack_inode_nlink(&ii, nlink);
2706         btrfs_set_stack_inode_mode(&ii, mode);
2707         btrfs_set_stack_inode_generation(&ii, trans->transid);
2708         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2709         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2710         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2711         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2712         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2713         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2714         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2715
2716         ret = btrfs_insert_inode(trans, root, ino, &ii);
2717         ASSERT(!ret);
2718
2719         warning("root %llu inode %llu recreating inode item, this may "
2720                 "be incomplete, please check permissions and content after "
2721                 "the fsck completes.\n", (unsigned long long)root->objectid,
2722                 (unsigned long long)ino);
2723
2724         return 0;
2725 }
2726
2727 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2728                                     struct btrfs_root *root, u64 ino,
2729                                     u8 filetype)
2730 {
2731         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2732
2733         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2734 }
2735
2736 static int create_inode_item(struct btrfs_root *root,
2737                              struct inode_record *rec, int root_dir)
2738 {
2739         struct btrfs_trans_handle *trans;
2740         u64 nlink = 0;
2741         u32 mode = 0;
2742         u64 size = 0;
2743         int ret;
2744
2745         trans = btrfs_start_transaction(root, 1);
2746         if (IS_ERR(trans)) {
2747                 ret = PTR_ERR(trans);
2748                 return ret;
2749         }
2750
2751         nlink = root_dir ? 1 : rec->found_link;
2752         if (rec->found_dir_item) {
2753                 if (rec->found_file_extent)
2754                         fprintf(stderr, "root %llu inode %llu has both a dir "
2755                                 "item and extents, unsure if it is a dir or a "
2756                                 "regular file so setting it as a directory\n",
2757                                 (unsigned long long)root->objectid,
2758                                 (unsigned long long)rec->ino);
2759                 mode = S_IFDIR | 0755;
2760                 size = rec->found_size;
2761         } else if (!rec->found_dir_item) {
2762                 size = rec->extent_end;
2763                 mode =  S_IFREG | 0755;
2764         }
2765
2766         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2767                                   nlink, mode);
2768         btrfs_commit_transaction(trans, root);
2769         return 0;
2770 }
2771
2772 static int repair_inode_backrefs(struct btrfs_root *root,
2773                                  struct inode_record *rec,
2774                                  struct cache_tree *inode_cache,
2775                                  int delete)
2776 {
2777         struct inode_backref *tmp, *backref;
2778         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2779         int ret = 0;
2780         int repaired = 0;
2781
2782         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2783                 if (!delete && rec->ino == root_dirid) {
2784                         if (!rec->found_inode_item) {
2785                                 ret = create_inode_item(root, rec, 1);
2786                                 if (ret)
2787                                         break;
2788                                 repaired++;
2789                         }
2790                 }
2791
2792                 /* Index 0 for root dir's are special, don't mess with it */
2793                 if (rec->ino == root_dirid && backref->index == 0)
2794                         continue;
2795
2796                 if (delete &&
2797                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2798                      (backref->found_dir_index && backref->found_inode_ref &&
2799                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2800                         ret = delete_dir_index(root, backref);
2801                         if (ret)
2802                                 break;
2803                         repaired++;
2804                         list_del(&backref->list);
2805                         free(backref);
2806                         continue;
2807                 }
2808
2809                 if (!delete && !backref->found_dir_index &&
2810                     backref->found_dir_item && backref->found_inode_ref) {
2811                         ret = add_missing_dir_index(root, inode_cache, rec,
2812                                                     backref);
2813                         if (ret)
2814                                 break;
2815                         repaired++;
2816                         if (backref->found_dir_item &&
2817                             backref->found_dir_index) {
2818                                 if (!backref->errors &&
2819                                     backref->found_inode_ref) {
2820                                         list_del(&backref->list);
2821                                         free(backref);
2822                                         continue;
2823                                 }
2824                         }
2825                 }
2826
2827                 if (!delete && (!backref->found_dir_index &&
2828                                 !backref->found_dir_item &&
2829                                 backref->found_inode_ref)) {
2830                         struct btrfs_trans_handle *trans;
2831                         struct btrfs_key location;
2832
2833                         ret = check_dir_conflict(root, backref->name,
2834                                                  backref->namelen,
2835                                                  backref->dir,
2836                                                  backref->index);
2837                         if (ret) {
2838                                 /*
2839                                  * let nlink fixing routine to handle it,
2840                                  * which can do it better.
2841                                  */
2842                                 ret = 0;
2843                                 break;
2844                         }
2845                         location.objectid = rec->ino;
2846                         location.type = BTRFS_INODE_ITEM_KEY;
2847                         location.offset = 0;
2848
2849                         trans = btrfs_start_transaction(root, 1);
2850                         if (IS_ERR(trans)) {
2851                                 ret = PTR_ERR(trans);
2852                                 break;
2853                         }
2854                         fprintf(stderr, "adding missing dir index/item pair "
2855                                 "for inode %llu\n",
2856                                 (unsigned long long)rec->ino);
2857                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2858                                                     backref->namelen,
2859                                                     backref->dir, &location,
2860                                                     imode_to_type(rec->imode),
2861                                                     backref->index);
2862                         BUG_ON(ret);
2863                         btrfs_commit_transaction(trans, root);
2864                         repaired++;
2865                 }
2866
2867                 if (!delete && (backref->found_inode_ref &&
2868                                 backref->found_dir_index &&
2869                                 backref->found_dir_item &&
2870                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2871                                 !rec->found_inode_item)) {
2872                         ret = create_inode_item(root, rec, 0);
2873                         if (ret)
2874                                 break;
2875                         repaired++;
2876                 }
2877
2878         }
2879         return ret ? ret : repaired;
2880 }
2881
2882 /*
2883  * To determine the file type for nlink/inode_item repair
2884  *
2885  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2886  * Return -ENOENT if file type is not found.
2887  */
2888 static int find_file_type(struct inode_record *rec, u8 *type)
2889 {
2890         struct inode_backref *backref;
2891
2892         /* For inode item recovered case */
2893         if (rec->found_inode_item) {
2894                 *type = imode_to_type(rec->imode);
2895                 return 0;
2896         }
2897
2898         list_for_each_entry(backref, &rec->backrefs, list) {
2899                 if (backref->found_dir_index || backref->found_dir_item) {
2900                         *type = backref->filetype;
2901                         return 0;
2902                 }
2903         }
2904         return -ENOENT;
2905 }
2906
2907 /*
2908  * To determine the file name for nlink repair
2909  *
2910  * Return 0 if file name is found, set name and namelen.
2911  * Return -ENOENT if file name is not found.
2912  */
2913 static int find_file_name(struct inode_record *rec,
2914                           char *name, int *namelen)
2915 {
2916         struct inode_backref *backref;
2917
2918         list_for_each_entry(backref, &rec->backrefs, list) {
2919                 if (backref->found_dir_index || backref->found_dir_item ||
2920                     backref->found_inode_ref) {
2921                         memcpy(name, backref->name, backref->namelen);
2922                         *namelen = backref->namelen;
2923                         return 0;
2924                 }
2925         }
2926         return -ENOENT;
2927 }
2928
2929 /* Reset the nlink of the inode to the correct one */
2930 static int reset_nlink(struct btrfs_trans_handle *trans,
2931                        struct btrfs_root *root,
2932                        struct btrfs_path *path,
2933                        struct inode_record *rec)
2934 {
2935         struct inode_backref *backref;
2936         struct inode_backref *tmp;
2937         struct btrfs_key key;
2938         struct btrfs_inode_item *inode_item;
2939         int ret = 0;
2940
2941         /* We don't believe this either, reset it and iterate backref */
2942         rec->found_link = 0;
2943
2944         /* Remove all backref including the valid ones */
2945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2946                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2947                                    backref->index, backref->name,
2948                                    backref->namelen, 0);
2949                 if (ret < 0)
2950                         goto out;
2951
2952                 /* remove invalid backref, so it won't be added back */
2953                 if (!(backref->found_dir_index &&
2954                       backref->found_dir_item &&
2955                       backref->found_inode_ref)) {
2956                         list_del(&backref->list);
2957                         free(backref);
2958                 } else {
2959                         rec->found_link++;
2960                 }
2961         }
2962
2963         /* Set nlink to 0 */
2964         key.objectid = rec->ino;
2965         key.type = BTRFS_INODE_ITEM_KEY;
2966         key.offset = 0;
2967         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2968         if (ret < 0)
2969                 goto out;
2970         if (ret > 0) {
2971                 ret = -ENOENT;
2972                 goto out;
2973         }
2974         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2975                                     struct btrfs_inode_item);
2976         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2977         btrfs_mark_buffer_dirty(path->nodes[0]);
2978         btrfs_release_path(path);
2979
2980         /*
2981          * Add back valid inode_ref/dir_item/dir_index,
2982          * add_link() will handle the nlink inc, so new nlink must be correct
2983          */
2984         list_for_each_entry(backref, &rec->backrefs, list) {
2985                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2986                                      backref->name, backref->namelen,
2987                                      backref->filetype, &backref->index, 1, 0);
2988                 if (ret < 0)
2989                         goto out;
2990         }
2991 out:
2992         btrfs_release_path(path);
2993         return ret;
2994 }
2995
2996 static int get_highest_inode(struct btrfs_trans_handle *trans,
2997                                 struct btrfs_root *root,
2998                                 struct btrfs_path *path,
2999                                 u64 *highest_ino)
3000 {
3001         struct btrfs_key key, found_key;
3002         int ret;
3003
3004         btrfs_init_path(path);
3005         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3006         key.offset = -1;
3007         key.type = BTRFS_INODE_ITEM_KEY;
3008         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3009         if (ret == 1) {
3010                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3011                                 path->slots[0] - 1);
3012                 *highest_ino = found_key.objectid;
3013                 ret = 0;
3014         }
3015         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3016                 ret = -EOVERFLOW;
3017         btrfs_release_path(path);
3018         return ret;
3019 }
3020
3021 /*
3022  * Link inode to dir 'lost+found'. Increase @ref_count.
3023  *
3024  * Returns 0 means success.
3025  * Returns <0 means failure.
3026  */
3027 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3028                                    struct btrfs_root *root,
3029                                    struct btrfs_path *path,
3030                                    u64 ino, char *namebuf, u32 name_len,
3031                                    u8 filetype, u64 *ref_count)
3032 {
3033         char *dir_name = "lost+found";
3034         u64 lost_found_ino;
3035         int ret;
3036         u32 mode = 0700;
3037
3038         btrfs_release_path(path);
3039         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3040         if (ret < 0)
3041                 goto out;
3042         lost_found_ino++;
3043
3044         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3045                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3046                           mode);
3047         if (ret < 0) {
3048                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3049                 goto out;
3050         }
3051         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3052                              namebuf, name_len, filetype, NULL, 1, 0);
3053         /*
3054          * Add ".INO" suffix several times to handle case where
3055          * "FILENAME.INO" is already taken by another file.
3056          */
3057         while (ret == -EEXIST) {
3058                 /*
3059                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3060                  */
3061                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3062                         ret = -EFBIG;
3063                         goto out;
3064                 }
3065                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3066                          ".%llu", ino);
3067                 name_len += count_digits(ino) + 1;
3068                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3069                                      name_len, filetype, NULL, 1, 0);
3070         }
3071         if (ret < 0) {
3072                 error("failed to link the inode %llu to %s dir: %s",
3073                       ino, dir_name, strerror(-ret));
3074                 goto out;
3075         }
3076
3077         ++*ref_count;
3078         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3079                name_len, namebuf, dir_name);
3080 out:
3081         btrfs_release_path(path);
3082         if (ret)
3083                 error("failed to move file '%.*s' to '%s' dir", name_len,
3084                                 namebuf, dir_name);
3085         return ret;
3086 }
3087
3088 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3089                                struct btrfs_root *root,
3090                                struct btrfs_path *path,
3091                                struct inode_record *rec)
3092 {
3093         char namebuf[BTRFS_NAME_LEN] = {0};
3094         u8 type = 0;
3095         int namelen = 0;
3096         int name_recovered = 0;
3097         int type_recovered = 0;
3098         int ret = 0;
3099
3100         /*
3101          * Get file name and type first before these invalid inode ref
3102          * are deleted by remove_all_invalid_backref()
3103          */
3104         name_recovered = !find_file_name(rec, namebuf, &namelen);
3105         type_recovered = !find_file_type(rec, &type);
3106
3107         if (!name_recovered) {
3108                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3109                        rec->ino, rec->ino);
3110                 namelen = count_digits(rec->ino);
3111                 sprintf(namebuf, "%llu", rec->ino);
3112                 name_recovered = 1;
3113         }
3114         if (!type_recovered) {
3115                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3116                        rec->ino);
3117                 type = BTRFS_FT_REG_FILE;
3118                 type_recovered = 1;
3119         }
3120
3121         ret = reset_nlink(trans, root, path, rec);
3122         if (ret < 0) {
3123                 fprintf(stderr,
3124                         "Failed to reset nlink for inode %llu: %s\n",
3125                         rec->ino, strerror(-ret));
3126                 goto out;
3127         }
3128
3129         if (rec->found_link == 0) {
3130                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3131                                               namebuf, namelen, type,
3132                                               (u64 *)&rec->found_link);
3133                 if (ret)
3134                         goto out;
3135         }
3136         printf("Fixed the nlink of inode %llu\n", rec->ino);
3137 out:
3138         /*
3139          * Clear the flag anyway, or we will loop forever for the same inode
3140          * as it will not be removed from the bad inode list and the dead loop
3141          * happens.
3142          */
3143         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3144         btrfs_release_path(path);
3145         return ret;
3146 }
3147
3148 /*
3149  * Check if there is any normal(reg or prealloc) file extent for given
3150  * ino.
3151  * This is used to determine the file type when neither its dir_index/item or
3152  * inode_item exists.
3153  *
3154  * This will *NOT* report error, if any error happens, just consider it does
3155  * not have any normal file extent.
3156  */
3157 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3158 {
3159         struct btrfs_path path;
3160         struct btrfs_key key;
3161         struct btrfs_key found_key;
3162         struct btrfs_file_extent_item *fi;
3163         u8 type;
3164         int ret = 0;
3165
3166         btrfs_init_path(&path);
3167         key.objectid = ino;
3168         key.type = BTRFS_EXTENT_DATA_KEY;
3169         key.offset = 0;
3170
3171         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3172         if (ret < 0) {
3173                 ret = 0;
3174                 goto out;
3175         }
3176         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3177                 ret = btrfs_next_leaf(root, &path);
3178                 if (ret) {
3179                         ret = 0;
3180                         goto out;
3181                 }
3182         }
3183         while (1) {
3184                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3185                                       path.slots[0]);
3186                 if (found_key.objectid != ino ||
3187                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3188                         break;
3189                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3190                                     struct btrfs_file_extent_item);
3191                 type = btrfs_file_extent_type(path.nodes[0], fi);
3192                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3193                         ret = 1;
3194                         goto out;
3195                 }
3196         }
3197 out:
3198         btrfs_release_path(&path);
3199         return ret;
3200 }
3201
3202 static u32 btrfs_type_to_imode(u8 type)
3203 {
3204         static u32 imode_by_btrfs_type[] = {
3205                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3206                 [BTRFS_FT_DIR]          = S_IFDIR,
3207                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3208                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3209                 [BTRFS_FT_FIFO]         = S_IFIFO,
3210                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3211                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3212         };
3213
3214         return imode_by_btrfs_type[(type)];
3215 }
3216
3217 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3218                                 struct btrfs_root *root,
3219                                 struct btrfs_path *path,
3220                                 struct inode_record *rec)
3221 {
3222         u8 filetype;
3223         u32 mode = 0700;
3224         int type_recovered = 0;
3225         int ret = 0;
3226
3227         printf("Trying to rebuild inode:%llu\n", rec->ino);
3228
3229         type_recovered = !find_file_type(rec, &filetype);
3230
3231         /*
3232          * Try to determine inode type if type not found.
3233          *
3234          * For found regular file extent, it must be FILE.
3235          * For found dir_item/index, it must be DIR.
3236          *
3237          * For undetermined one, use FILE as fallback.
3238          *
3239          * TODO:
3240          * 1. If found backref(inode_index/item is already handled) to it,
3241          *    it must be DIR.
3242          *    Need new inode-inode ref structure to allow search for that.
3243          */
3244         if (!type_recovered) {
3245                 if (rec->found_file_extent &&
3246                     find_normal_file_extent(root, rec->ino)) {
3247                         type_recovered = 1;
3248                         filetype = BTRFS_FT_REG_FILE;
3249                 } else if (rec->found_dir_item) {
3250                         type_recovered = 1;
3251                         filetype = BTRFS_FT_DIR;
3252                 } else if (!list_empty(&rec->orphan_extents)) {
3253                         type_recovered = 1;
3254                         filetype = BTRFS_FT_REG_FILE;
3255                 } else{
3256                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3257                                rec->ino);
3258                         type_recovered = 1;
3259                         filetype = BTRFS_FT_REG_FILE;
3260                 }
3261         }
3262
3263         ret = btrfs_new_inode(trans, root, rec->ino,
3264                               mode | btrfs_type_to_imode(filetype));
3265         if (ret < 0)
3266                 goto out;
3267
3268         /*
3269          * Here inode rebuild is done, we only rebuild the inode item,
3270          * don't repair the nlink(like move to lost+found).
3271          * That is the job of nlink repair.
3272          *
3273          * We just fill the record and return
3274          */
3275         rec->found_dir_item = 1;
3276         rec->imode = mode | btrfs_type_to_imode(filetype);
3277         rec->nlink = 0;
3278         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3279         /* Ensure the inode_nlinks repair function will be called */
3280         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3281 out:
3282         return ret;
3283 }
3284
3285 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3286                                       struct btrfs_root *root,
3287                                       struct btrfs_path *path,
3288                                       struct inode_record *rec)
3289 {
3290         struct orphan_data_extent *orphan;
3291         struct orphan_data_extent *tmp;
3292         int ret = 0;
3293
3294         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3295                 /*
3296                  * Check for conflicting file extents
3297                  *
3298                  * Here we don't know whether the extents is compressed or not,
3299                  * so we can only assume it not compressed nor data offset,
3300                  * and use its disk_len as extent length.
3301                  */
3302                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3303                                        orphan->offset, orphan->disk_len, 0);
3304                 btrfs_release_path(path);
3305                 if (ret < 0)
3306                         goto out;
3307                 if (!ret) {
3308                         fprintf(stderr,
3309                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3310                                 orphan->disk_bytenr, orphan->disk_len);
3311                         ret = btrfs_free_extent(trans,
3312                                         root->fs_info->extent_root,
3313                                         orphan->disk_bytenr, orphan->disk_len,
3314                                         0, root->objectid, orphan->objectid,
3315                                         orphan->offset);
3316                         if (ret < 0)
3317                                 goto out;
3318                 }
3319                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3320                                 orphan->offset, orphan->disk_bytenr,
3321                                 orphan->disk_len, orphan->disk_len);
3322                 if (ret < 0)
3323                         goto out;
3324
3325                 /* Update file size info */
3326                 rec->found_size += orphan->disk_len;
3327                 if (rec->found_size == rec->nbytes)
3328                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3329
3330                 /* Update the file extent hole info too */
3331                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3332                                            orphan->disk_len);
3333                 if (ret < 0)
3334                         goto out;
3335                 if (RB_EMPTY_ROOT(&rec->holes))
3336                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3337
3338                 list_del(&orphan->list);
3339                 free(orphan);
3340         }
3341         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3342 out:
3343         return ret;
3344 }
3345
3346 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3347                                         struct btrfs_root *root,
3348                                         struct btrfs_path *path,
3349                                         struct inode_record *rec)
3350 {
3351         struct rb_node *node;
3352         struct file_extent_hole *hole;
3353         int found = 0;
3354         int ret = 0;
3355
3356         node = rb_first(&rec->holes);
3357
3358         while (node) {
3359                 found = 1;
3360                 hole = rb_entry(node, struct file_extent_hole, node);
3361                 ret = btrfs_punch_hole(trans, root, rec->ino,
3362                                        hole->start, hole->len);
3363                 if (ret < 0)
3364                         goto out;
3365                 ret = del_file_extent_hole(&rec->holes, hole->start,
3366                                            hole->len);
3367                 if (ret < 0)
3368                         goto out;
3369                 if (RB_EMPTY_ROOT(&rec->holes))
3370                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3371                 node = rb_first(&rec->holes);
3372         }
3373         /* special case for a file losing all its file extent */
3374         if (!found) {
3375                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3376                                        round_up(rec->isize,
3377                                                 root->fs_info->sectorsize));
3378                 if (ret < 0)
3379                         goto out;
3380         }
3381         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3382                rec->ino, root->objectid);
3383 out:
3384         return ret;
3385 }
3386
3387 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3388 {
3389         struct btrfs_trans_handle *trans;
3390         struct btrfs_path path;
3391         int ret = 0;
3392
3393         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3394                              I_ERR_NO_ORPHAN_ITEM |
3395                              I_ERR_LINK_COUNT_WRONG |
3396                              I_ERR_NO_INODE_ITEM |
3397                              I_ERR_FILE_EXTENT_ORPHAN |
3398                              I_ERR_FILE_EXTENT_DISCOUNT|
3399                              I_ERR_FILE_NBYTES_WRONG)))
3400                 return rec->errors;
3401
3402         /*
3403          * For nlink repair, it may create a dir and add link, so
3404          * 2 for parent(256)'s dir_index and dir_item
3405          * 2 for lost+found dir's inode_item and inode_ref
3406          * 1 for the new inode_ref of the file
3407          * 2 for lost+found dir's dir_index and dir_item for the file
3408          */
3409         trans = btrfs_start_transaction(root, 7);
3410         if (IS_ERR(trans))
3411                 return PTR_ERR(trans);
3412
3413         btrfs_init_path(&path);
3414         if (rec->errors & I_ERR_NO_INODE_ITEM)
3415                 ret = repair_inode_no_item(trans, root, &path, rec);
3416         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3417                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3418         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3419                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3420         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3421                 ret = repair_inode_isize(trans, root, &path, rec);
3422         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3423                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3424         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3425                 ret = repair_inode_nlinks(trans, root, &path, rec);
3426         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3427                 ret = repair_inode_nbytes(trans, root, &path, rec);
3428         btrfs_commit_transaction(trans, root);
3429         btrfs_release_path(&path);
3430         return ret;
3431 }
3432
3433 static int check_inode_recs(struct btrfs_root *root,
3434                             struct cache_tree *inode_cache)
3435 {
3436         struct cache_extent *cache;
3437         struct ptr_node *node;
3438         struct inode_record *rec;
3439         struct inode_backref *backref;
3440         int stage = 0;
3441         int ret = 0;
3442         int err = 0;
3443         u64 error = 0;
3444         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3445
3446         if (btrfs_root_refs(&root->root_item) == 0) {
3447                 if (!cache_tree_empty(inode_cache))
3448                         fprintf(stderr, "warning line %d\n", __LINE__);
3449                 return 0;
3450         }
3451
3452         /*
3453          * We need to repair backrefs first because we could change some of the
3454          * errors in the inode recs.
3455          *
3456          * We also need to go through and delete invalid backrefs first and then
3457          * add the correct ones second.  We do this because we may get EEXIST
3458          * when adding back the correct index because we hadn't yet deleted the
3459          * invalid index.
3460          *
3461          * For example, if we were missing a dir index then the directories
3462          * isize would be wrong, so if we fixed the isize to what we thought it
3463          * would be and then fixed the backref we'd still have a invalid fs, so
3464          * we need to add back the dir index and then check to see if the isize
3465          * is still wrong.
3466          */
3467         while (stage < 3) {
3468                 stage++;
3469                 if (stage == 3 && !err)
3470                         break;
3471
3472                 cache = search_cache_extent(inode_cache, 0);
3473                 while (repair && cache) {
3474                         node = container_of(cache, struct ptr_node, cache);
3475                         rec = node->data;
3476                         cache = next_cache_extent(cache);
3477
3478                         /* Need to free everything up and rescan */
3479                         if (stage == 3) {
3480                                 remove_cache_extent(inode_cache, &node->cache);
3481                                 free(node);
3482                                 free_inode_rec(rec);
3483                                 continue;
3484                         }
3485
3486                         if (list_empty(&rec->backrefs))
3487                                 continue;
3488
3489                         ret = repair_inode_backrefs(root, rec, inode_cache,
3490                                                     stage == 1);
3491                         if (ret < 0) {
3492                                 err = ret;
3493                                 stage = 2;
3494                                 break;
3495                         } if (ret > 0) {
3496                                 err = -EAGAIN;
3497                         }
3498                 }
3499         }
3500         if (err)
3501                 return err;
3502
3503         rec = get_inode_rec(inode_cache, root_dirid, 0);
3504         BUG_ON(IS_ERR(rec));
3505         if (rec) {
3506                 ret = check_root_dir(rec);
3507                 if (ret) {
3508                         fprintf(stderr, "root %llu root dir %llu error\n",
3509                                 (unsigned long long)root->root_key.objectid,
3510                                 (unsigned long long)root_dirid);
3511                         print_inode_error(root, rec);
3512                         error++;
3513                 }
3514         } else {
3515                 if (repair) {
3516                         struct btrfs_trans_handle *trans;
3517
3518                         trans = btrfs_start_transaction(root, 1);
3519                         if (IS_ERR(trans)) {
3520                                 err = PTR_ERR(trans);
3521                                 return err;
3522                         }
3523
3524                         fprintf(stderr,
3525                                 "root %llu missing its root dir, recreating\n",
3526                                 (unsigned long long)root->objectid);
3527
3528                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3529                         BUG_ON(ret);
3530
3531                         btrfs_commit_transaction(trans, root);
3532                         return -EAGAIN;
3533                 }
3534
3535                 fprintf(stderr, "root %llu root dir %llu not found\n",
3536                         (unsigned long long)root->root_key.objectid,
3537                         (unsigned long long)root_dirid);
3538         }
3539
3540         while (1) {
3541                 cache = search_cache_extent(inode_cache, 0);
3542                 if (!cache)
3543                         break;
3544                 node = container_of(cache, struct ptr_node, cache);
3545                 rec = node->data;
3546                 remove_cache_extent(inode_cache, &node->cache);
3547                 free(node);
3548                 if (rec->ino == root_dirid ||
3549                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3550                         free_inode_rec(rec);
3551                         continue;
3552                 }
3553
3554                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3555                         ret = check_orphan_item(root, rec->ino);
3556                         if (ret == 0)
3557                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3558                         if (can_free_inode_rec(rec)) {
3559                                 free_inode_rec(rec);
3560                                 continue;
3561                         }
3562                 }
3563
3564                 if (!rec->found_inode_item)
3565                         rec->errors |= I_ERR_NO_INODE_ITEM;
3566                 if (rec->found_link != rec->nlink)
3567                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3568                 if (repair) {
3569                         ret = try_repair_inode(root, rec);
3570                         if (ret == 0 && can_free_inode_rec(rec)) {
3571                                 free_inode_rec(rec);
3572                                 continue;
3573                         }
3574                         ret = 0;
3575                 }
3576
3577                 if (!(repair && ret == 0))
3578                         error++;
3579                 print_inode_error(root, rec);
3580                 list_for_each_entry(backref, &rec->backrefs, list) {
3581                         if (!backref->found_dir_item)
3582                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3583                         if (!backref->found_dir_index)
3584                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3585                         if (!backref->found_inode_ref)
3586                                 backref->errors |= REF_ERR_NO_INODE_REF;
3587                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3588                                 " namelen %u name %s filetype %d errors %x",
3589                                 (unsigned long long)backref->dir,
3590                                 (unsigned long long)backref->index,
3591                                 backref->namelen, backref->name,
3592                                 backref->filetype, backref->errors);
3593                         print_ref_error(backref->errors);
3594                 }
3595                 free_inode_rec(rec);
3596         }
3597         return (error > 0) ? -1 : 0;
3598 }
3599
3600 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3601                                         u64 objectid)
3602 {
3603         struct cache_extent *cache;
3604         struct root_record *rec = NULL;
3605         int ret;
3606
3607         cache = lookup_cache_extent(root_cache, objectid, 1);
3608         if (cache) {
3609                 rec = container_of(cache, struct root_record, cache);
3610         } else {
3611                 rec = calloc(1, sizeof(*rec));
3612                 if (!rec)
3613                         return ERR_PTR(-ENOMEM);
3614                 rec->objectid = objectid;
3615                 INIT_LIST_HEAD(&rec->backrefs);
3616                 rec->cache.start = objectid;
3617                 rec->cache.size = 1;
3618
3619                 ret = insert_cache_extent(root_cache, &rec->cache);
3620                 if (ret)
3621                         return ERR_PTR(-EEXIST);
3622         }
3623         return rec;
3624 }
3625
3626 static struct root_backref *get_root_backref(struct root_record *rec,
3627                                              u64 ref_root, u64 dir, u64 index,
3628                                              const char *name, int namelen)
3629 {
3630         struct root_backref *backref;
3631
3632         list_for_each_entry(backref, &rec->backrefs, list) {
3633                 if (backref->ref_root != ref_root || backref->dir != dir ||
3634                     backref->namelen != namelen)
3635                         continue;
3636                 if (memcmp(name, backref->name, namelen))
3637                         continue;
3638                 return backref;
3639         }
3640
3641         backref = calloc(1, sizeof(*backref) + namelen + 1);
3642         if (!backref)
3643                 return NULL;
3644         backref->ref_root = ref_root;
3645         backref->dir = dir;
3646         backref->index = index;
3647         backref->namelen = namelen;
3648         memcpy(backref->name, name, namelen);
3649         backref->name[namelen] = '\0';
3650         list_add_tail(&backref->list, &rec->backrefs);
3651         return backref;
3652 }
3653
3654 static void free_root_record(struct cache_extent *cache)
3655 {
3656         struct root_record *rec;
3657         struct root_backref *backref;
3658
3659         rec = container_of(cache, struct root_record, cache);
3660         while (!list_empty(&rec->backrefs)) {
3661                 backref = to_root_backref(rec->backrefs.next);
3662                 list_del(&backref->list);
3663                 free(backref);
3664         }
3665
3666         free(rec);
3667 }
3668
3669 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3670
3671 static int add_root_backref(struct cache_tree *root_cache,
3672                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3673                             const char *name, int namelen,
3674                             int item_type, int errors)
3675 {
3676         struct root_record *rec;
3677         struct root_backref *backref;
3678
3679         rec = get_root_rec(root_cache, root_id);
3680         BUG_ON(IS_ERR(rec));
3681         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3682         BUG_ON(!backref);
3683
3684         backref->errors |= errors;
3685
3686         if (item_type != BTRFS_DIR_ITEM_KEY) {
3687                 if (backref->found_dir_index || backref->found_back_ref ||
3688                     backref->found_forward_ref) {
3689                         if (backref->index != index)
3690                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3691                 } else {
3692                         backref->index = index;
3693                 }
3694         }
3695
3696         if (item_type == BTRFS_DIR_ITEM_KEY) {
3697                 if (backref->found_forward_ref)
3698                         rec->found_ref++;
3699                 backref->found_dir_item = 1;
3700         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3701                 backref->found_dir_index = 1;
3702         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3703                 if (backref->found_forward_ref)
3704                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3705                 else if (backref->found_dir_item)
3706                         rec->found_ref++;
3707                 backref->found_forward_ref = 1;
3708         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3709                 if (backref->found_back_ref)
3710                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3711                 backref->found_back_ref = 1;
3712         } else {
3713                 BUG_ON(1);
3714         }
3715
3716         if (backref->found_forward_ref && backref->found_dir_item)
3717                 backref->reachable = 1;
3718         return 0;
3719 }
3720
3721 static int merge_root_recs(struct btrfs_root *root,
3722                            struct cache_tree *src_cache,
3723                            struct cache_tree *dst_cache)
3724 {
3725         struct cache_extent *cache;
3726         struct ptr_node *node;
3727         struct inode_record *rec;
3728         struct inode_backref *backref;
3729         int ret = 0;
3730
3731         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3732                 free_inode_recs_tree(src_cache);
3733                 return 0;
3734         }
3735
3736         while (1) {
3737                 cache = search_cache_extent(src_cache, 0);
3738                 if (!cache)
3739                         break;
3740                 node = container_of(cache, struct ptr_node, cache);
3741                 rec = node->data;
3742                 remove_cache_extent(src_cache, &node->cache);
3743                 free(node);
3744
3745                 ret = is_child_root(root, root->objectid, rec->ino);
3746                 if (ret < 0)
3747                         break;
3748                 else if (ret == 0)
3749                         goto skip;
3750
3751                 list_for_each_entry(backref, &rec->backrefs, list) {
3752                         BUG_ON(backref->found_inode_ref);
3753                         if (backref->found_dir_item)
3754                                 add_root_backref(dst_cache, rec->ino,
3755                                         root->root_key.objectid, backref->dir,
3756                                         backref->index, backref->name,
3757                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3758                                         backref->errors);
3759                         if (backref->found_dir_index)
3760                                 add_root_backref(dst_cache, rec->ino,
3761                                         root->root_key.objectid, backref->dir,
3762                                         backref->index, backref->name,
3763                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3764                                         backref->errors);
3765                 }
3766 skip:
3767                 free_inode_rec(rec);
3768         }
3769         if (ret < 0)
3770                 return ret;
3771         return 0;
3772 }
3773
3774 static int check_root_refs(struct btrfs_root *root,
3775                            struct cache_tree *root_cache)
3776 {
3777         struct root_record *rec;
3778         struct root_record *ref_root;
3779         struct root_backref *backref;
3780         struct cache_extent *cache;
3781         int loop = 1;
3782         int ret;
3783         int error;
3784         int errors = 0;
3785
3786         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3787         BUG_ON(IS_ERR(rec));
3788         rec->found_ref = 1;
3789
3790         /* fixme: this can not detect circular references */
3791         while (loop) {
3792                 loop = 0;
3793                 cache = search_cache_extent(root_cache, 0);
3794                 while (1) {
3795                         if (!cache)
3796                                 break;
3797                         rec = container_of(cache, struct root_record, cache);
3798                         cache = next_cache_extent(cache);
3799
3800                         if (rec->found_ref == 0)
3801                                 continue;
3802
3803                         list_for_each_entry(backref, &rec->backrefs, list) {
3804                                 if (!backref->reachable)
3805                                         continue;
3806
3807                                 ref_root = get_root_rec(root_cache,
3808                                                         backref->ref_root);
3809                                 BUG_ON(IS_ERR(ref_root));
3810                                 if (ref_root->found_ref > 0)
3811                                         continue;
3812
3813                                 backref->reachable = 0;
3814                                 rec->found_ref--;
3815                                 if (rec->found_ref == 0)
3816                                         loop = 1;
3817                         }
3818                 }
3819         }
3820
3821         cache = search_cache_extent(root_cache, 0);
3822         while (1) {
3823                 if (!cache)
3824                         break;
3825                 rec = container_of(cache, struct root_record, cache);
3826                 cache = next_cache_extent(cache);
3827
3828                 if (rec->found_ref == 0 &&
3829                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3830                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3831                         ret = check_orphan_item(root->fs_info->tree_root,
3832                                                 rec->objectid);
3833                         if (ret == 0)
3834                                 continue;
3835
3836                         /*
3837                          * If we don't have a root item then we likely just have
3838                          * a dir item in a snapshot for this root but no actual
3839                          * ref key or anything so it's meaningless.
3840                          */
3841                         if (!rec->found_root_item)
3842                                 continue;
3843                         errors++;
3844                         fprintf(stderr, "fs tree %llu not referenced\n",
3845                                 (unsigned long long)rec->objectid);
3846                 }
3847
3848                 error = 0;
3849                 if (rec->found_ref > 0 && !rec->found_root_item)
3850                         error = 1;
3851                 list_for_each_entry(backref, &rec->backrefs, list) {
3852                         if (!backref->found_dir_item)
3853                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3854                         if (!backref->found_dir_index)
3855                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3856                         if (!backref->found_back_ref)
3857                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3858                         if (!backref->found_forward_ref)
3859                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3860                         if (backref->reachable && backref->errors)
3861                                 error = 1;
3862                 }
3863                 if (!error)
3864                         continue;
3865
3866                 errors++;
3867                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3868                         (unsigned long long)rec->objectid, rec->found_ref,
3869                          rec->found_root_item ? "" : "not found");
3870
3871                 list_for_each_entry(backref, &rec->backrefs, list) {
3872                         if (!backref->reachable)
3873                                 continue;
3874                         if (!backref->errors && rec->found_root_item)
3875                                 continue;
3876                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3877                                 " index %llu namelen %u name %s errors %x\n",
3878                                 (unsigned long long)backref->ref_root,
3879                                 (unsigned long long)backref->dir,
3880                                 (unsigned long long)backref->index,
3881                                 backref->namelen, backref->name,
3882                                 backref->errors);
3883                         print_ref_error(backref->errors);
3884                 }
3885         }
3886         return errors > 0 ? 1 : 0;
3887 }
3888
3889 static int process_root_ref(struct extent_buffer *eb, int slot,
3890                             struct btrfs_key *key,
3891                             struct cache_tree *root_cache)
3892 {
3893         u64 dirid;
3894         u64 index;
3895         u32 len;
3896         u32 name_len;
3897         struct btrfs_root_ref *ref;
3898         char namebuf[BTRFS_NAME_LEN];
3899         int error;
3900
3901         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3902
3903         dirid = btrfs_root_ref_dirid(eb, ref);
3904         index = btrfs_root_ref_sequence(eb, ref);
3905         name_len = btrfs_root_ref_name_len(eb, ref);
3906
3907         if (name_len <= BTRFS_NAME_LEN) {
3908                 len = name_len;
3909                 error = 0;
3910         } else {
3911                 len = BTRFS_NAME_LEN;
3912                 error = REF_ERR_NAME_TOO_LONG;
3913         }
3914         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3915
3916         if (key->type == BTRFS_ROOT_REF_KEY) {
3917                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3918                                  index, namebuf, len, key->type, error);
3919         } else {
3920                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3921                                  index, namebuf, len, key->type, error);
3922         }
3923         return 0;
3924 }
3925
3926 static void free_corrupt_block(struct cache_extent *cache)
3927 {
3928         struct btrfs_corrupt_block *corrupt;
3929
3930         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3931         free(corrupt);
3932 }
3933
3934 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3935
3936 /*
3937  * Repair the btree of the given root.
3938  *
3939  * The fix is to remove the node key in corrupt_blocks cache_tree.
3940  * and rebalance the tree.
3941  * After the fix, the btree should be writeable.
3942  */
3943 static int repair_btree(struct btrfs_root *root,
3944                         struct cache_tree *corrupt_blocks)
3945 {
3946         struct btrfs_trans_handle *trans;
3947         struct btrfs_path path;
3948         struct btrfs_corrupt_block *corrupt;
3949         struct cache_extent *cache;
3950         struct btrfs_key key;
3951         u64 offset;
3952         int level;
3953         int ret = 0;
3954
3955         if (cache_tree_empty(corrupt_blocks))
3956                 return 0;
3957
3958         trans = btrfs_start_transaction(root, 1);
3959         if (IS_ERR(trans)) {
3960                 ret = PTR_ERR(trans);
3961                 fprintf(stderr, "Error starting transaction: %s\n",
3962                         strerror(-ret));
3963                 return ret;
3964         }
3965         btrfs_init_path(&path);
3966         cache = first_cache_extent(corrupt_blocks);
3967         while (cache) {
3968                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3969                                        cache);
3970                 level = corrupt->level;
3971                 path.lowest_level = level;
3972                 key.objectid = corrupt->key.objectid;
3973                 key.type = corrupt->key.type;
3974                 key.offset = corrupt->key.offset;
3975
3976                 /*
3977                  * Here we don't want to do any tree balance, since it may
3978                  * cause a balance with corrupted brother leaf/node,
3979                  * so ins_len set to 0 here.
3980                  * Balance will be done after all corrupt node/leaf is deleted.
3981                  */
3982                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3983                 if (ret < 0)
3984                         goto out;
3985                 offset = btrfs_node_blockptr(path.nodes[level],
3986                                              path.slots[level]);
3987
3988                 /* Remove the ptr */
3989                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3990                 if (ret < 0)
3991                         goto out;
3992                 /*
3993                  * Remove the corresponding extent
3994                  * return value is not concerned.
3995                  */
3996                 btrfs_release_path(&path);
3997                 ret = btrfs_free_extent(trans, root, offset,
3998                                 root->fs_info->nodesize, 0,
3999                                 root->root_key.objectid, level - 1, 0);
4000                 cache = next_cache_extent(cache);
4001         }
4002
4003         /* Balance the btree using btrfs_search_slot() */
4004         cache = first_cache_extent(corrupt_blocks);
4005         while (cache) {
4006                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4007                                        cache);
4008                 memcpy(&key, &corrupt->key, sizeof(key));
4009                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4010                 if (ret < 0)
4011                         goto out;
4012                 /* return will always >0 since it won't find the item */
4013                 ret = 0;
4014                 btrfs_release_path(&path);
4015                 cache = next_cache_extent(cache);
4016         }
4017 out:
4018         btrfs_commit_transaction(trans, root);
4019         btrfs_release_path(&path);
4020         return ret;
4021 }
4022
4023 static int check_fs_root(struct btrfs_root *root,
4024                          struct cache_tree *root_cache,
4025                          struct walk_control *wc)
4026 {
4027         int ret = 0;
4028         int err = 0;
4029         int wret;
4030         int level;
4031         struct btrfs_path path;
4032         struct shared_node root_node;
4033         struct root_record *rec;
4034         struct btrfs_root_item *root_item = &root->root_item;
4035         struct cache_tree corrupt_blocks;
4036         struct orphan_data_extent *orphan;
4037         struct orphan_data_extent *tmp;
4038         enum btrfs_tree_block_status status;
4039         struct node_refs nrefs;
4040
4041         /*
4042          * Reuse the corrupt_block cache tree to record corrupted tree block
4043          *
4044          * Unlike the usage in extent tree check, here we do it in a per
4045          * fs/subvol tree base.
4046          */
4047         cache_tree_init(&corrupt_blocks);
4048         root->fs_info->corrupt_blocks = &corrupt_blocks;
4049
4050         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4051                 rec = get_root_rec(root_cache, root->root_key.objectid);
4052                 BUG_ON(IS_ERR(rec));
4053                 if (btrfs_root_refs(root_item) > 0)
4054                         rec->found_root_item = 1;
4055         }
4056
4057         btrfs_init_path(&path);
4058         memset(&root_node, 0, sizeof(root_node));
4059         cache_tree_init(&root_node.root_cache);
4060         cache_tree_init(&root_node.inode_cache);
4061         memset(&nrefs, 0, sizeof(nrefs));
4062
4063         /* Move the orphan extent record to corresponding inode_record */
4064         list_for_each_entry_safe(orphan, tmp,
4065                                  &root->orphan_data_extents, list) {
4066                 struct inode_record *inode;
4067
4068                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4069                                       1);
4070                 BUG_ON(IS_ERR(inode));
4071                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4072                 list_move(&orphan->list, &inode->orphan_extents);
4073         }
4074
4075         level = btrfs_header_level(root->node);
4076         memset(wc->nodes, 0, sizeof(wc->nodes));
4077         wc->nodes[level] = &root_node;
4078         wc->active_node = level;
4079         wc->root_level = level;
4080
4081         /* We may not have checked the root block, lets do that now */
4082         if (btrfs_is_leaf(root->node))
4083                 status = btrfs_check_leaf(root, NULL, root->node);
4084         else
4085                 status = btrfs_check_node(root, NULL, root->node);
4086         if (status != BTRFS_TREE_BLOCK_CLEAN)
4087                 return -EIO;
4088
4089         if (btrfs_root_refs(root_item) > 0 ||
4090             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4091                 path.nodes[level] = root->node;
4092                 extent_buffer_get(root->node);
4093                 path.slots[level] = 0;
4094         } else {
4095                 struct btrfs_key key;
4096                 struct btrfs_disk_key found_key;
4097
4098                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4099                 level = root_item->drop_level;
4100                 path.lowest_level = level;
4101                 if (level > btrfs_header_level(root->node) ||
4102                     level >= BTRFS_MAX_LEVEL) {
4103                         error("ignoring invalid drop level: %u", level);
4104                         goto skip_walking;
4105                 }
4106                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4107                 if (wret < 0)
4108                         goto skip_walking;
4109                 btrfs_node_key(path.nodes[level], &found_key,
4110                                 path.slots[level]);
4111                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4112                                         sizeof(found_key)));
4113         }
4114
4115         while (1) {
4116                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4117                 if (wret < 0)
4118                         ret = wret;
4119                 if (wret != 0)
4120                         break;
4121
4122                 wret = walk_up_tree(root, &path, wc, &level);
4123                 if (wret < 0)
4124                         ret = wret;
4125                 if (wret != 0)
4126                         break;
4127         }
4128 skip_walking:
4129         btrfs_release_path(&path);
4130
4131         if (!cache_tree_empty(&corrupt_blocks)) {
4132                 struct cache_extent *cache;
4133                 struct btrfs_corrupt_block *corrupt;
4134
4135                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4136                        root->root_key.objectid);
4137                 cache = first_cache_extent(&corrupt_blocks);
4138                 while (cache) {
4139                         corrupt = container_of(cache,
4140                                                struct btrfs_corrupt_block,
4141                                                cache);
4142                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4143                                cache->start, corrupt->level,
4144                                corrupt->key.objectid, corrupt->key.type,
4145                                corrupt->key.offset);
4146                         cache = next_cache_extent(cache);
4147                 }
4148                 if (repair) {
4149                         printf("Try to repair the btree for root %llu\n",
4150                                root->root_key.objectid);
4151                         ret = repair_btree(root, &corrupt_blocks);
4152                         if (ret < 0)
4153                                 fprintf(stderr, "Failed to repair btree: %s\n",
4154                                         strerror(-ret));
4155                         if (!ret)
4156                                 printf("Btree for root %llu is fixed\n",
4157                                        root->root_key.objectid);
4158                 }
4159         }
4160
4161         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4162         if (err < 0)
4163                 ret = err;
4164
4165         if (root_node.current) {
4166                 root_node.current->checked = 1;
4167                 maybe_free_inode_rec(&root_node.inode_cache,
4168                                 root_node.current);
4169         }
4170
4171         err = check_inode_recs(root, &root_node.inode_cache);
4172         if (!ret)
4173                 ret = err;
4174
4175         free_corrupt_blocks_tree(&corrupt_blocks);
4176         root->fs_info->corrupt_blocks = NULL;
4177         free_orphan_data_extents(&root->orphan_data_extents);
4178         return ret;
4179 }
4180
4181 static int fs_root_objectid(u64 objectid)
4182 {
4183         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4184             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4185                 return 1;
4186         return is_fstree(objectid);
4187 }
4188
4189 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4190                           struct cache_tree *root_cache)
4191 {
4192         struct btrfs_path path;
4193         struct btrfs_key key;
4194         struct walk_control wc;
4195         struct extent_buffer *leaf, *tree_node;
4196         struct btrfs_root *tmp_root;
4197         struct btrfs_root *tree_root = fs_info->tree_root;
4198         int ret;
4199         int err = 0;
4200
4201         if (ctx.progress_enabled) {
4202                 ctx.tp = TASK_FS_ROOTS;
4203                 task_start(ctx.info);
4204         }
4205
4206         /*
4207          * Just in case we made any changes to the extent tree that weren't
4208          * reflected into the free space cache yet.
4209          */
4210         if (repair)
4211                 reset_cached_block_groups(fs_info);
4212         memset(&wc, 0, sizeof(wc));
4213         cache_tree_init(&wc.shared);
4214         btrfs_init_path(&path);
4215
4216 again:
4217         key.offset = 0;
4218         key.objectid = 0;
4219         key.type = BTRFS_ROOT_ITEM_KEY;
4220         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4221         if (ret < 0) {
4222                 err = 1;
4223                 goto out;
4224         }
4225         tree_node = tree_root->node;
4226         while (1) {
4227                 if (tree_node != tree_root->node) {
4228                         free_root_recs_tree(root_cache);
4229                         btrfs_release_path(&path);
4230                         goto again;
4231                 }
4232                 leaf = path.nodes[0];
4233                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4234                         ret = btrfs_next_leaf(tree_root, &path);
4235                         if (ret) {
4236                                 if (ret < 0)
4237                                         err = 1;
4238                                 break;
4239                         }
4240                         leaf = path.nodes[0];
4241                 }
4242                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4243                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4244                     fs_root_objectid(key.objectid)) {
4245                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4246                                 tmp_root = btrfs_read_fs_root_no_cache(
4247                                                 fs_info, &key);
4248                         } else {
4249                                 key.offset = (u64)-1;
4250                                 tmp_root = btrfs_read_fs_root(
4251                                                 fs_info, &key);
4252                         }
4253                         if (IS_ERR(tmp_root)) {
4254                                 err = 1;
4255                                 goto next;
4256                         }
4257                         ret = check_fs_root(tmp_root, root_cache, &wc);
4258                         if (ret == -EAGAIN) {
4259                                 free_root_recs_tree(root_cache);
4260                                 btrfs_release_path(&path);
4261                                 goto again;
4262                         }
4263                         if (ret)
4264                                 err = 1;
4265                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4266                                 btrfs_free_fs_root(tmp_root);
4267                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4268                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4269                         process_root_ref(leaf, path.slots[0], &key,
4270                                          root_cache);
4271                 }
4272 next:
4273                 path.slots[0]++;
4274         }
4275 out:
4276         btrfs_release_path(&path);
4277         if (err)
4278                 free_extent_cache_tree(&wc.shared);
4279         if (!cache_tree_empty(&wc.shared))
4280                 fprintf(stderr, "warning line %d\n", __LINE__);
4281
4282         task_stop(ctx.info);
4283
4284         return err;
4285 }
4286
4287 /*
4288  * Find the @index according by @ino and name.
4289  * Notice:time efficiency is O(N)
4290  *
4291  * @root:       the root of the fs/file tree
4292  * @index_ret:  the index as return value
4293  * @namebuf:    the name to match
4294  * @name_len:   the length of name to match
4295  * @file_type:  the file_type of INODE_ITEM to match
4296  *
4297  * Returns 0 if found and *@index_ret will be modified with right value
4298  * Returns< 0 not found and *@index_ret will be (u64)-1
4299  */
4300 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4301                           u64 *index_ret, char *namebuf, u32 name_len,
4302                           u8 file_type)
4303 {
4304         struct btrfs_path path;
4305         struct extent_buffer *node;
4306         struct btrfs_dir_item *di;
4307         struct btrfs_key key;
4308         struct btrfs_key location;
4309         char name[BTRFS_NAME_LEN] = {0};
4310
4311         u32 total;
4312         u32 cur = 0;
4313         u32 len;
4314         u32 data_len;
4315         u8 filetype;
4316         int slot;
4317         int ret;
4318
4319         ASSERT(index_ret);
4320
4321         /* search from the last index */
4322         key.objectid = dirid;
4323         key.offset = (u64)-1;
4324         key.type = BTRFS_DIR_INDEX_KEY;
4325
4326         btrfs_init_path(&path);
4327         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4328         if (ret < 0)
4329                 return ret;
4330
4331 loop:
4332         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4333         if (ret) {
4334                 ret = -ENOENT;
4335                 *index_ret = (64)-1;
4336                 goto out;
4337         }
4338         /* Check whether inode_id/filetype/name match */
4339         node = path.nodes[0];
4340         slot = path.slots[0];
4341         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4342         total = btrfs_item_size_nr(node, slot);
4343         while (cur < total) {
4344                 ret = -ENOENT;
4345                 len = btrfs_dir_name_len(node, di);
4346                 data_len = btrfs_dir_data_len(node, di);
4347
4348                 btrfs_dir_item_key_to_cpu(node, di, &location);
4349                 if (location.objectid != location_id ||
4350                     location.type != BTRFS_INODE_ITEM_KEY ||
4351                     location.offset != 0)
4352                         goto next;
4353
4354                 filetype = btrfs_dir_type(node, di);
4355                 if (file_type != filetype)
4356                         goto next;
4357
4358                 if (len > BTRFS_NAME_LEN)
4359                         len = BTRFS_NAME_LEN;
4360
4361                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4362                 if (len != name_len || strncmp(namebuf, name, len))
4363                         goto next;
4364
4365                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4366                 *index_ret = key.offset;
4367                 ret = 0;
4368                 goto out;
4369 next:
4370                 len += sizeof(*di) + data_len;
4371                 di = (struct btrfs_dir_item *)((char *)di + len);
4372                 cur += len;
4373         }
4374         goto loop;
4375
4376 out:
4377         btrfs_release_path(&path);
4378         return ret;
4379 }
4380
4381 /*
4382  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4383  * INODE_REF/INODE_EXTREF match.
4384  *
4385  * @root:       the root of the fs/file tree
4386  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4387  *              value while find index
4388  * @location_key: location key of the struct btrfs_dir_item to match
4389  * @name:       the name to match
4390  * @namelen:    the length of name
4391  * @file_type:  the type of file to math
4392  *
4393  * Return 0 if no error occurred.
4394  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4395  * DIR_ITEM/DIR_INDEX
4396  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4397  * and DIR_ITEM/DIR_INDEX mismatch
4398  */
4399 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4400                          struct btrfs_key *location_key, char *name,
4401                          u32 namelen, u8 file_type)
4402 {
4403         struct btrfs_path path;
4404         struct extent_buffer *node;
4405         struct btrfs_dir_item *di;
4406         struct btrfs_key location;
4407         char namebuf[BTRFS_NAME_LEN] = {0};
4408         u32 total;
4409         u32 cur = 0;
4410         u32 len;
4411         u32 data_len;
4412         u8 filetype;
4413         int slot;
4414         int ret;
4415
4416         /* get the index by traversing all index */
4417         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4418                 ret = find_dir_index(root, key->objectid,
4419                                      location_key->objectid, &key->offset,
4420                                      name, namelen, file_type);
4421                 if (ret)
4422                         ret = DIR_INDEX_MISSING;
4423                 return ret;
4424         }
4425
4426         btrfs_init_path(&path);
4427         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4428         if (ret) {
4429                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4430                         DIR_INDEX_MISSING;
4431                 goto out;
4432         }
4433
4434         /* Check whether inode_id/filetype/name match */
4435         node = path.nodes[0];
4436         slot = path.slots[0];
4437         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4438         total = btrfs_item_size_nr(node, slot);
4439         while (cur < total) {
4440                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4441                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4442
4443                 len = btrfs_dir_name_len(node, di);
4444                 data_len = btrfs_dir_data_len(node, di);
4445
4446                 btrfs_dir_item_key_to_cpu(node, di, &location);
4447                 if (location.objectid != location_key->objectid ||
4448                     location.type != location_key->type ||
4449                     location.offset != location_key->offset)
4450                         goto next;
4451
4452                 filetype = btrfs_dir_type(node, di);
4453                 if (file_type != filetype)
4454                         goto next;
4455
4456                 if (len > BTRFS_NAME_LEN) {
4457                         len = BTRFS_NAME_LEN;
4458                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4459                         root->objectid,
4460                         key->type == BTRFS_DIR_ITEM_KEY ?
4461                         "DIR_ITEM" : "DIR_INDEX",
4462                         key->objectid, key->offset, len);
4463                 }
4464                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4465                                    len);
4466                 if (len != namelen || strncmp(namebuf, name, len))
4467                         goto next;
4468
4469                 ret = 0;
4470                 goto out;
4471 next:
4472                 len += sizeof(*di) + data_len;
4473                 di = (struct btrfs_dir_item *)((char *)di + len);
4474                 cur += len;
4475         }
4476
4477 out:
4478         btrfs_release_path(&path);
4479         return ret;
4480 }
4481
4482 /*
4483  * Prints inode ref error message
4484  */
4485 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4486                                 u64 index, const char *namebuf, int name_len,
4487                                 u8 filetype, int err)
4488 {
4489         if (!err)
4490                 return;
4491
4492         /* root dir error */
4493         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4494                 error(
4495         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4496                       root->objectid, key->objectid, key->offset, namebuf);
4497                 return;
4498         }
4499
4500         /* normal error */
4501         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4502                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4503                       root->objectid, key->offset,
4504                       btrfs_name_hash(namebuf, name_len),
4505                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4506                       namebuf, filetype);
4507         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4508                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4509                       root->objectid, key->offset, index,
4510                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4511                       namebuf, filetype);
4512 }
4513
4514 /*
4515  * Insert the missing inode item.
4516  *
4517  * Returns 0 means success.
4518  * Returns <0 means error.
4519  */
4520 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4521                                      u8 filetype)
4522 {
4523         struct btrfs_key key;
4524         struct btrfs_trans_handle *trans;
4525         struct btrfs_path path;
4526         int ret;
4527
4528         key.objectid = ino;
4529         key.type = BTRFS_INODE_ITEM_KEY;
4530         key.offset = 0;
4531
4532         btrfs_init_path(&path);
4533         trans = btrfs_start_transaction(root, 1);
4534         if (IS_ERR(trans)) {
4535                 ret = -EIO;
4536                 goto out;
4537         }
4538
4539         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4540         if (ret < 0 || !ret)
4541                 goto fail;
4542
4543         /* insert inode item */
4544         create_inode_item_lowmem(trans, root, ino, filetype);
4545         ret = 0;
4546 fail:
4547         btrfs_commit_transaction(trans, root);
4548 out:
4549         if (ret)
4550                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4551                       root->objectid, ino);
4552         btrfs_release_path(&path);
4553         return ret;
4554 }
4555
4556 /*
4557  * The ternary means dir item, dir index and relative inode ref.
4558  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4559  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4560  * strategy:
4561  * If two of three is missing or mismatched, delete the existing one.
4562  * If one of three is missing or mismatched, add the missing one.
4563  *
4564  * returns 0 means success.
4565  * returns not 0 means on error;
4566  */
4567 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4568                           u64 index, char *name, int name_len, u8 filetype,
4569                           int err)
4570 {
4571         struct btrfs_trans_handle *trans;
4572         int stage = 0;
4573         int ret = 0;
4574
4575         /*
4576          * stage shall be one of following valild values:
4577          *      0: Fine, nothing to do.
4578          *      1: One of three is wrong, so add missing one.
4579          *      2: Two of three is wrong, so delete existed one.
4580          */
4581         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4582                 stage++;
4583         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4584                 stage++;
4585         if (err & (INODE_REF_MISSING))
4586                 stage++;
4587
4588         /* stage must be smllarer than 3 */
4589         ASSERT(stage < 3);
4590
4591         trans = btrfs_start_transaction(root, 1);
4592         if (stage == 2) {
4593                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4594                                    name_len, 0);
4595                 goto out;
4596         }
4597         if (stage == 1) {
4598                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4599                                filetype, &index, 1, 1);
4600                 goto out;
4601         }
4602 out:
4603         btrfs_commit_transaction(trans, root);
4604
4605         if (ret)
4606                 error("fail to repair inode %llu name %s filetype %u",
4607                       ino, name, filetype);
4608         else
4609                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4610                        stage == 2 ? "Delete" : "Add",
4611                        ino, name, filetype);
4612
4613         return ret;
4614 }
4615
4616 /*
4617  * Traverse the given INODE_REF and call find_dir_item() to find related
4618  * DIR_ITEM/DIR_INDEX.
4619  *
4620  * @root:       the root of the fs/file tree
4621  * @ref_key:    the key of the INODE_REF
4622  * @path        the path provides node and slot
4623  * @refs:       the count of INODE_REF
4624  * @mode:       the st_mode of INODE_ITEM
4625  * @name_ret:   returns with the first ref's name
4626  * @name_len_ret:    len of the name_ret
4627  *
4628  * Return 0 if no error occurred.
4629  */
4630 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4631                            struct btrfs_path *path, char *name_ret,
4632                            u32 *namelen_ret, u64 *refs_ret, int mode)
4633 {
4634         struct btrfs_key key;
4635         struct btrfs_key location;
4636         struct btrfs_inode_ref *ref;
4637         struct extent_buffer *node;
4638         char namebuf[BTRFS_NAME_LEN] = {0};
4639         u32 total;
4640         u32 cur = 0;
4641         u32 len;
4642         u32 name_len;
4643         u64 index;
4644         int ret;
4645         int err = 0;
4646         int tmp_err;
4647         int slot;
4648         int need_research = 0;
4649         u64 refs;
4650
4651 begin:
4652         err = 0;
4653         cur = 0;
4654         refs = *refs_ret;
4655
4656         /* since after repair, path and the dir item may be changed */
4657         if (need_research) {
4658                 need_research = 0;
4659                 btrfs_release_path(path);
4660                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4661                 /* the item was deleted, let path point to the last checked item */
4662                 if (ret > 0) {
4663                         if (path->slots[0] == 0)
4664                                 btrfs_prev_leaf(root, path);
4665                         else
4666                                 path->slots[0]--;
4667                 }
4668                 if (ret)
4669                         goto out;
4670         }
4671
4672         location.objectid = ref_key->objectid;
4673         location.type = BTRFS_INODE_ITEM_KEY;
4674         location.offset = 0;
4675         node = path->nodes[0];
4676         slot = path->slots[0];
4677
4678         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4679         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4680         total = btrfs_item_size_nr(node, slot);
4681
4682 next:
4683         /* Update inode ref count */
4684         refs++;
4685         tmp_err = 0;
4686         index = btrfs_inode_ref_index(node, ref);
4687         name_len = btrfs_inode_ref_name_len(node, ref);
4688
4689         if (name_len <= BTRFS_NAME_LEN) {
4690                 len = name_len;
4691         } else {
4692                 len = BTRFS_NAME_LEN;
4693                 warning("root %llu INODE_REF[%llu %llu] name too long",
4694                         root->objectid, ref_key->objectid, ref_key->offset);
4695         }
4696
4697         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4698
4699         /* copy the first name found to name_ret */
4700         if (refs == 1 && name_ret) {
4701                 memcpy(name_ret, namebuf, len);
4702                 *namelen_ret = len;
4703         }
4704
4705         /* Check root dir ref */
4706         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4707                 if (index != 0 || len != strlen("..") ||
4708                     strncmp("..", namebuf, len) ||
4709                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4710                         /* set err bits then repair will delete the ref */
4711                         err |= DIR_INDEX_MISSING;
4712                         err |= DIR_ITEM_MISSING;
4713                 }
4714                 goto end;
4715         }
4716
4717         /* Find related DIR_INDEX */
4718         key.objectid = ref_key->offset;
4719         key.type = BTRFS_DIR_INDEX_KEY;
4720         key.offset = index;
4721         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4722                             imode_to_type(mode));
4723
4724         /* Find related dir_item */
4725         key.objectid = ref_key->offset;
4726         key.type = BTRFS_DIR_ITEM_KEY;
4727         key.offset = btrfs_name_hash(namebuf, len);
4728         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4729                             imode_to_type(mode));
4730 end:
4731         if (tmp_err && repair) {
4732                 ret = repair_ternary_lowmem(root, ref_key->offset,
4733                                             ref_key->objectid, index, namebuf,
4734                                             name_len, imode_to_type(mode),
4735                                             tmp_err);
4736                 if (!ret) {
4737                         need_research = 1;
4738                         goto begin;
4739                 }
4740         }
4741         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4742                             imode_to_type(mode), tmp_err);
4743         err |= tmp_err;
4744         len = sizeof(*ref) + name_len;
4745         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4746         cur += len;
4747         if (cur < total)
4748                 goto next;
4749
4750 out:
4751         *refs_ret = refs;
4752         return err;
4753 }
4754
4755 /*
4756  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4757  * DIR_ITEM/DIR_INDEX.
4758  *
4759  * @root:       the root of the fs/file tree
4760  * @ref_key:    the key of the INODE_EXTREF
4761  * @refs:       the count of INODE_EXTREF
4762  * @mode:       the st_mode of INODE_ITEM
4763  *
4764  * Return 0 if no error occurred.
4765  */
4766 static int check_inode_extref(struct btrfs_root *root,
4767                               struct btrfs_key *ref_key,
4768                               struct extent_buffer *node, int slot, u64 *refs,
4769                               int mode)
4770 {
4771         struct btrfs_key key;
4772         struct btrfs_key location;
4773         struct btrfs_inode_extref *extref;
4774         char namebuf[BTRFS_NAME_LEN] = {0};
4775         u32 total;
4776         u32 cur = 0;
4777         u32 len;
4778         u32 name_len;
4779         u64 index;
4780         u64 parent;
4781         int ret;
4782         int err = 0;
4783
4784         location.objectid = ref_key->objectid;
4785         location.type = BTRFS_INODE_ITEM_KEY;
4786         location.offset = 0;
4787
4788         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4789         total = btrfs_item_size_nr(node, slot);
4790
4791 next:
4792         /* update inode ref count */
4793         (*refs)++;
4794         name_len = btrfs_inode_extref_name_len(node, extref);
4795         index = btrfs_inode_extref_index(node, extref);
4796         parent = btrfs_inode_extref_parent(node, extref);
4797         if (name_len <= BTRFS_NAME_LEN) {
4798                 len = name_len;
4799         } else {
4800                 len = BTRFS_NAME_LEN;
4801                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4802                         root->objectid, ref_key->objectid, ref_key->offset);
4803         }
4804         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4805
4806         /* Check root dir ref name */
4807         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4808                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4809                       root->objectid, ref_key->objectid, ref_key->offset,
4810                       namebuf);
4811                 err |= ROOT_DIR_ERROR;
4812         }
4813
4814         /* find related dir_index */
4815         key.objectid = parent;
4816         key.type = BTRFS_DIR_INDEX_KEY;
4817         key.offset = index;
4818         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4819         err |= ret;
4820
4821         /* find related dir_item */
4822         key.objectid = parent;
4823         key.type = BTRFS_DIR_ITEM_KEY;
4824         key.offset = btrfs_name_hash(namebuf, len);
4825         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4826         err |= ret;
4827
4828         len = sizeof(*extref) + name_len;
4829         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4830         cur += len;
4831
4832         if (cur < total)
4833                 goto next;
4834
4835         return err;
4836 }
4837
4838 /*
4839  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4840  * DIR_ITEM/DIR_INDEX match.
4841  * Return with @index_ret.
4842  *
4843  * @root:       the root of the fs/file tree
4844  * @key:        the key of the INODE_REF/INODE_EXTREF
4845  * @name:       the name in the INODE_REF/INODE_EXTREF
4846  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4847  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4848  *              value (64)-1 means do not check index
4849  * @ext_ref:    the EXTENDED_IREF feature
4850  *
4851  * Return 0 if no error occurred.
4852  * Return >0 for error bitmap
4853  */
4854 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4855                           char *name, int namelen, u64 *index_ret,
4856                           unsigned int ext_ref)
4857 {
4858         struct btrfs_path path;
4859         struct btrfs_inode_ref *ref;
4860         struct btrfs_inode_extref *extref;
4861         struct extent_buffer *node;
4862         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4863         u32 total;
4864         u32 cur = 0;
4865         u32 len;
4866         u32 ref_namelen;
4867         u64 ref_index;
4868         u64 parent;
4869         u64 dir_id;
4870         int slot;
4871         int ret;
4872
4873         ASSERT(index_ret);
4874
4875         btrfs_init_path(&path);
4876         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4877         if (ret) {
4878                 ret = INODE_REF_MISSING;
4879                 goto extref;
4880         }
4881
4882         node = path.nodes[0];
4883         slot = path.slots[0];
4884
4885         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4886         total = btrfs_item_size_nr(node, slot);
4887
4888         /* Iterate all entry of INODE_REF */
4889         while (cur < total) {
4890                 ret = INODE_REF_MISSING;
4891
4892                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4893                 ref_index = btrfs_inode_ref_index(node, ref);
4894                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4895                         goto next_ref;
4896
4897                 if (cur + sizeof(*ref) + ref_namelen > total ||
4898                     ref_namelen > BTRFS_NAME_LEN) {
4899                         warning("root %llu INODE %s[%llu %llu] name too long",
4900                                 root->objectid,
4901                                 key->type == BTRFS_INODE_REF_KEY ?
4902                                         "REF" : "EXTREF",
4903                                 key->objectid, key->offset);
4904
4905                         if (cur + sizeof(*ref) > total)
4906                                 break;
4907                         len = min_t(u32, total - cur - sizeof(*ref),
4908                                     BTRFS_NAME_LEN);
4909                 } else {
4910                         len = ref_namelen;
4911                 }
4912
4913                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4914                                    len);
4915
4916                 if (len != namelen || strncmp(ref_namebuf, name, len))
4917                         goto next_ref;
4918
4919                 *index_ret = ref_index;
4920                 ret = 0;
4921                 goto out;
4922 next_ref:
4923                 len = sizeof(*ref) + ref_namelen;
4924                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4925                 cur += len;
4926         }
4927
4928 extref:
4929         /* Skip if not support EXTENDED_IREF feature */
4930         if (!ext_ref)
4931                 goto out;
4932
4933         btrfs_release_path(&path);
4934         btrfs_init_path(&path);
4935
4936         dir_id = key->offset;
4937         key->type = BTRFS_INODE_EXTREF_KEY;
4938         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4939
4940         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4941         if (ret) {
4942                 ret = INODE_REF_MISSING;
4943                 goto out;
4944         }
4945
4946         node = path.nodes[0];
4947         slot = path.slots[0];
4948
4949         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4950         cur = 0;
4951         total = btrfs_item_size_nr(node, slot);
4952
4953         /* Iterate all entry of INODE_EXTREF */
4954         while (cur < total) {
4955                 ret = INODE_REF_MISSING;
4956
4957                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4958                 ref_index = btrfs_inode_extref_index(node, extref);
4959                 parent = btrfs_inode_extref_parent(node, extref);
4960                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4961                         goto next_extref;
4962
4963                 if (parent != dir_id)
4964                         goto next_extref;
4965
4966                 if (ref_namelen <= BTRFS_NAME_LEN) {
4967                         len = ref_namelen;
4968                 } else {
4969                         len = BTRFS_NAME_LEN;
4970                         warning("root %llu INODE %s[%llu %llu] name too long",
4971                                 root->objectid,
4972                                 key->type == BTRFS_INODE_REF_KEY ?
4973                                         "REF" : "EXTREF",
4974                                 key->objectid, key->offset);
4975                 }
4976                 read_extent_buffer(node, ref_namebuf,
4977                                    (unsigned long)(extref + 1), len);
4978
4979                 if (len != namelen || strncmp(ref_namebuf, name, len))
4980                         goto next_extref;
4981
4982                 *index_ret = ref_index;
4983                 ret = 0;
4984                 goto out;
4985
4986 next_extref:
4987                 len = sizeof(*extref) + ref_namelen;
4988                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4989                 cur += len;
4990
4991         }
4992 out:
4993         btrfs_release_path(&path);
4994         return ret;
4995 }
4996
4997 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4998                                u64 ino, u64 index, const char *namebuf,
4999                                int name_len, u8 filetype, int err)
5000 {
5001         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5002                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5003                       root->objectid, key->objectid, key->offset, namebuf,
5004                       filetype,
5005                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5006         }
5007
5008         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5009                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5010                       root->objectid, key->objectid, index, namebuf, filetype,
5011                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5012         }
5013
5014         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5015                 error(
5016                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5017                       root->objectid, ino, index, namebuf, filetype,
5018                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5019         }
5020
5021         if (err & INODE_REF_MISSING)
5022                 error(
5023                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5024                       root->objectid, ino, key->objectid, namebuf, filetype);
5025
5026 }
5027
5028 /*
5029  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5030  *
5031  * Returns error after repair
5032  */
5033 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5034                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5035                            int err)
5036 {
5037         int ret;
5038
5039         if (err & INODE_ITEM_MISSING) {
5040                 ret = repair_inode_item_missing(root, ino, filetype);
5041                 if (!ret)
5042                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5043         }
5044
5045         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5046                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5047                                             name_len, filetype, err);
5048                 if (!ret) {
5049                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5050                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5051                         err &= ~(INODE_REF_MISSING);
5052                 }
5053         }
5054         return err;
5055 }
5056
5057 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5058                 u64 *size_ret)
5059 {
5060         struct btrfs_key key;
5061         struct btrfs_path path;
5062         u32 len;
5063         struct btrfs_dir_item *di;
5064         int ret;
5065         int cur = 0;
5066         int total = 0;
5067
5068         ASSERT(size_ret);
5069         *size_ret = 0;
5070
5071         key.objectid = ino;
5072         key.type = type;
5073         key.offset = (u64)-1;
5074
5075         btrfs_init_path(&path);
5076         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5077         if (ret < 0) {
5078                 ret = -EIO;
5079                 goto out;
5080         }
5081         /* if found, go to spacial case */
5082         if (ret == 0)
5083                 goto special_case;
5084
5085 loop:
5086         ret = btrfs_previous_item(root, &path, ino, type);
5087
5088         if (ret) {
5089                 ret = 0;
5090                 goto out;
5091         }
5092
5093 special_case:
5094         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5095         cur = 0;
5096         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5097
5098         while (cur < total) {
5099                 len = btrfs_dir_name_len(path.nodes[0], di);
5100                 if (len > BTRFS_NAME_LEN)
5101                         len = BTRFS_NAME_LEN;
5102                 *size_ret += len;
5103
5104                 len += btrfs_dir_data_len(path.nodes[0], di);
5105                 len += sizeof(*di);
5106                 di = (struct btrfs_dir_item *)((char *)di + len);
5107                 cur += len;
5108         }
5109         goto loop;
5110
5111 out:
5112         btrfs_release_path(&path);
5113         return ret;
5114 }
5115
5116 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5117 {
5118         u64 item_size;
5119         u64 index_size;
5120         int ret;
5121
5122         ASSERT(size);
5123         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5124         if (ret)
5125                 goto out;
5126
5127         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5128         if (ret)
5129                 goto out;
5130
5131         *size = item_size + index_size;
5132
5133 out:
5134         if (ret)
5135                 error("failed to count root %llu INODE[%llu] root size",
5136                       root->objectid, ino);
5137         return ret;
5138 }
5139
5140 /*
5141  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5142  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5143  *
5144  * @root:       the root of the fs/file tree
5145  * @key:        the key of the INODE_REF/INODE_EXTREF
5146  * @path:       the path
5147  * @size:       the st_size of the INODE_ITEM
5148  * @ext_ref:    the EXTENDED_IREF feature
5149  *
5150  * Return 0 if no error occurred.
5151  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5152  */
5153 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5154                           struct btrfs_path *path, u64 *size,
5155                           unsigned int ext_ref)
5156 {
5157         struct btrfs_dir_item *di;
5158         struct btrfs_inode_item *ii;
5159         struct btrfs_key key;
5160         struct btrfs_key location;
5161         struct extent_buffer *node;
5162         int slot;
5163         char namebuf[BTRFS_NAME_LEN] = {0};
5164         u32 total;
5165         u32 cur = 0;
5166         u32 len;
5167         u32 name_len;
5168         u32 data_len;
5169         u8 filetype;
5170         u32 mode = 0;
5171         u64 index;
5172         int ret;
5173         int err;
5174         int tmp_err;
5175         int need_research = 0;
5176
5177         /*
5178          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5179          * ignore index check.
5180          */
5181         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5182                 index = di_key->offset;
5183         else
5184                 index = (u64)-1;
5185 begin:
5186         err = 0;
5187         cur = 0;
5188
5189         /* since after repair, path and the dir item may be changed */
5190         if (need_research) {
5191                 need_research = 0;
5192                 err |= DIR_COUNT_AGAIN;
5193                 btrfs_release_path(path);
5194                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5195                 /* the item was deleted, let path point the last checked item */
5196                 if (ret > 0) {
5197                         if (path->slots[0] == 0)
5198                                 btrfs_prev_leaf(root, path);
5199                         else
5200                                 path->slots[0]--;
5201                 }
5202                 if (ret)
5203                         goto out;
5204         }
5205
5206         node = path->nodes[0];
5207         slot = path->slots[0];
5208
5209         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5210         total = btrfs_item_size_nr(node, slot);
5211         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5212
5213         while (cur < total) {
5214                 data_len = btrfs_dir_data_len(node, di);
5215                 tmp_err = 0;
5216                 if (data_len)
5217                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5218                               root->objectid,
5219               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5220                               di_key->objectid, di_key->offset, data_len);
5221
5222                 name_len = btrfs_dir_name_len(node, di);
5223                 if (name_len <= BTRFS_NAME_LEN) {
5224                         len = name_len;
5225                 } else {
5226                         len = BTRFS_NAME_LEN;
5227                         warning("root %llu %s[%llu %llu] name too long",
5228                                 root->objectid,
5229                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5230                                 di_key->objectid, di_key->offset);
5231                 }
5232                 (*size) += name_len;
5233                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5234                                    len);
5235                 filetype = btrfs_dir_type(node, di);
5236
5237                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5238                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5239                         err |= -EIO;
5240                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5241                         root->objectid, di_key->objectid, di_key->offset,
5242                         namebuf, len, filetype, di_key->offset,
5243                         btrfs_name_hash(namebuf, len));
5244                 }
5245
5246                 btrfs_dir_item_key_to_cpu(node, di, &location);
5247                 /* Ignore related ROOT_ITEM check */
5248                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5249                         goto next;
5250
5251                 btrfs_release_path(path);
5252                 /* Check relative INODE_ITEM(existence/filetype) */
5253                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5254                 if (ret) {
5255                         tmp_err |= INODE_ITEM_MISSING;
5256                         goto next;
5257                 }
5258
5259                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5260                                     struct btrfs_inode_item);
5261                 mode = btrfs_inode_mode(path->nodes[0], ii);
5262                 if (imode_to_type(mode) != filetype) {
5263                         tmp_err |= INODE_ITEM_MISMATCH;
5264                         goto next;
5265                 }
5266
5267                 /* Check relative INODE_REF/INODE_EXTREF */
5268                 key.objectid = location.objectid;
5269                 key.type = BTRFS_INODE_REF_KEY;
5270                 key.offset = di_key->objectid;
5271                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5272                                           &index, ext_ref);
5273
5274                 /* check relative INDEX/ITEM */
5275                 key.objectid = di_key->objectid;
5276                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5277                         key.type = BTRFS_DIR_INDEX_KEY;
5278                         key.offset = index;
5279                 } else {
5280                         key.type = BTRFS_DIR_ITEM_KEY;
5281                         key.offset = btrfs_name_hash(namebuf, name_len);
5282                 }
5283
5284                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5285                                          name_len, filetype);
5286                 /* find_dir_item may find index */
5287                 if (key.type == BTRFS_DIR_INDEX_KEY)
5288                         index = key.offset;
5289 next:
5290
5291                 if (tmp_err && repair) {
5292                         ret = repair_dir_item(root, di_key->objectid,
5293                                               location.objectid, index,
5294                                               imode_to_type(mode), namebuf,
5295                                               name_len, tmp_err);
5296                         if (ret != tmp_err) {
5297                                 need_research = 1;
5298                                 goto begin;
5299                         }
5300                 }
5301                 btrfs_release_path(path);
5302                 print_dir_item_err(root, di_key, location.objectid, index,
5303                                    namebuf, name_len, filetype, tmp_err);
5304                 err |= tmp_err;
5305                 len = sizeof(*di) + name_len + data_len;
5306                 di = (struct btrfs_dir_item *)((char *)di + len);
5307                 cur += len;
5308
5309                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5310                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5311                               root->objectid, di_key->objectid,
5312                               di_key->offset);
5313                         break;
5314                 }
5315         }
5316 out:
5317         /* research path */
5318         btrfs_release_path(path);
5319         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5320         if (ret)
5321                 err |= ret > 0 ? -ENOENT : ret;
5322         return err;
5323 }
5324
5325 /*
5326  * Wrapper function of btrfs_punch_hole.
5327  *
5328  * Returns 0 means success.
5329  * Returns not 0 means error.
5330  */
5331 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5332                              u64 len)
5333 {
5334         struct btrfs_trans_handle *trans;
5335         int ret = 0;
5336
5337         trans = btrfs_start_transaction(root, 1);
5338         if (IS_ERR(trans))
5339                 return PTR_ERR(trans);
5340
5341         ret = btrfs_punch_hole(trans, root, ino, start, len);
5342         if (ret)
5343                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5344                       start, len, ino);
5345         else
5346                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5347                        ino);
5348
5349         btrfs_commit_transaction(trans, root);
5350         return ret;
5351 }
5352
5353 /*
5354  * Check file extent datasum/hole, update the size of the file extents,
5355  * check and update the last offset of the file extent.
5356  *
5357  * @root:       the root of fs/file tree.
5358  * @fkey:       the key of the file extent.
5359  * @nodatasum:  INODE_NODATASUM feature.
5360  * @size:       the sum of all EXTENT_DATA items size for this inode.
5361  * @end:        the offset of the last extent.
5362  *
5363  * Return 0 if no error occurred.
5364  */
5365 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5366                              struct extent_buffer *node, int slot,
5367                              unsigned int nodatasum, u64 *size, u64 *end)
5368 {
5369         struct btrfs_file_extent_item *fi;
5370         u64 disk_bytenr;
5371         u64 disk_num_bytes;
5372         u64 extent_num_bytes;
5373         u64 extent_offset;
5374         u64 csum_found;         /* In byte size, sectorsize aligned */
5375         u64 search_start;       /* Logical range start we search for csum */
5376         u64 search_len;         /* Logical range len we search for csum */
5377         unsigned int extent_type;
5378         unsigned int is_hole;
5379         int compressed = 0;
5380         int ret;
5381         int err = 0;
5382
5383         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5384
5385         /* Check inline extent */
5386         extent_type = btrfs_file_extent_type(node, fi);
5387         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5388                 struct btrfs_item *e = btrfs_item_nr(slot);
5389                 u32 item_inline_len;
5390
5391                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5392                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5393                 compressed = btrfs_file_extent_compression(node, fi);
5394                 if (extent_num_bytes == 0) {
5395                         error(
5396                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5397                                 root->objectid, fkey->objectid, fkey->offset);
5398                         err |= FILE_EXTENT_ERROR;
5399                 }
5400                 if (!compressed && extent_num_bytes != item_inline_len) {
5401                         error(
5402                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5403                                 root->objectid, fkey->objectid, fkey->offset,
5404                                 extent_num_bytes, item_inline_len);
5405                         err |= FILE_EXTENT_ERROR;
5406                 }
5407                 *end += extent_num_bytes;
5408                 *size += extent_num_bytes;
5409                 return err;
5410         }
5411
5412         /* Check extent type */
5413         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5414                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5415                 err |= FILE_EXTENT_ERROR;
5416                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5417                       root->objectid, fkey->objectid, fkey->offset);
5418                 return err;
5419         }
5420
5421         /* Check REG_EXTENT/PREALLOC_EXTENT */
5422         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5423         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5424         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5425         extent_offset = btrfs_file_extent_offset(node, fi);
5426         compressed = btrfs_file_extent_compression(node, fi);
5427         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5428
5429         /*
5430          * Check EXTENT_DATA csum
5431          *
5432          * For plain (uncompressed) extent, we should only check the range
5433          * we're referring to, as it's possible that part of prealloc extent
5434          * has been written, and has csum:
5435          *
5436          * |<--- Original large preallocated extent A ---->|
5437          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5438          *      No csum                         Has csum
5439          *
5440          * For compressed extent, we should check the whole range.
5441          */
5442         if (!compressed) {
5443                 search_start = disk_bytenr + extent_offset;
5444                 search_len = extent_num_bytes;
5445         } else {
5446                 search_start = disk_bytenr;
5447                 search_len = disk_num_bytes;
5448         }
5449         ret = count_csum_range(root, search_start, search_len, &csum_found);
5450         if (csum_found > 0 && nodatasum) {
5451                 err |= ODD_CSUM_ITEM;
5452                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5453                       root->objectid, fkey->objectid, fkey->offset);
5454         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5455                    !is_hole && (ret < 0 || csum_found < search_len)) {
5456                 err |= CSUM_ITEM_MISSING;
5457                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5458                       root->objectid, fkey->objectid, fkey->offset,
5459                       csum_found, search_len);
5460         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5461                 err |= ODD_CSUM_ITEM;
5462                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5463                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5464         }
5465
5466         /* Check EXTENT_DATA hole */
5467         if (!no_holes && *end != fkey->offset) {
5468                 if (repair)
5469                         ret = punch_extent_hole(root, fkey->objectid,
5470                                                 *end, fkey->offset - *end);
5471                 if (!repair || ret) {
5472                         err |= FILE_EXTENT_ERROR;
5473                         error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5474                               root->objectid, fkey->objectid, fkey->offset);
5475                 }
5476         }
5477
5478         *end += extent_num_bytes;
5479         if (!is_hole)
5480                 *size += extent_num_bytes;
5481
5482         return err;
5483 }
5484
5485 /*
5486  * Set inode item nbytes to @nbytes
5487  *
5488  * Returns  0     on success
5489  * Returns  != 0  on error
5490  */
5491 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5492                                       struct btrfs_path *path,
5493                                       u64 ino, u64 nbytes)
5494 {
5495         struct btrfs_trans_handle *trans;
5496         struct btrfs_inode_item *ii;
5497         struct btrfs_key key;
5498         struct btrfs_key research_key;
5499         int err = 0;
5500         int ret;
5501
5502         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5503
5504         key.objectid = ino;
5505         key.type = BTRFS_INODE_ITEM_KEY;
5506         key.offset = 0;
5507
5508         trans = btrfs_start_transaction(root, 1);
5509         if (IS_ERR(trans)) {
5510                 ret = PTR_ERR(trans);
5511                 err |= ret;
5512                 goto out;
5513         }
5514
5515         btrfs_release_path(path);
5516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5517         if (ret > 0)
5518                 ret = -ENOENT;
5519         if (ret) {
5520                 err |= ret;
5521                 goto fail;
5522         }
5523
5524         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5525                             struct btrfs_inode_item);
5526         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5527         btrfs_mark_buffer_dirty(path->nodes[0]);
5528 fail:
5529         btrfs_commit_transaction(trans, root);
5530 out:
5531         if (ret)
5532                 error("failed to set nbytes in inode %llu root %llu",
5533                       ino, root->root_key.objectid);
5534         else
5535                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5536                        root->root_key.objectid, nbytes);
5537
5538         /* research path */
5539         btrfs_release_path(path);
5540         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5541         err |= ret;
5542
5543         return err;
5544 }
5545
5546 /*
5547  * Set directory inode isize to @isize.
5548  *
5549  * Returns 0     on success.
5550  * Returns != 0  on error.
5551  */
5552 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5553                                    struct btrfs_path *path,
5554                                    u64 ino, u64 isize)
5555 {
5556         struct btrfs_trans_handle *trans;
5557         struct btrfs_inode_item *ii;
5558         struct btrfs_key key;
5559         struct btrfs_key research_key;
5560         int ret;
5561         int err = 0;
5562
5563         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5564
5565         key.objectid = ino;
5566         key.type = BTRFS_INODE_ITEM_KEY;
5567         key.offset = 0;
5568
5569         trans = btrfs_start_transaction(root, 1);
5570         if (IS_ERR(trans)) {
5571                 ret = PTR_ERR(trans);
5572                 err |= ret;
5573                 goto out;
5574         }
5575
5576         btrfs_release_path(path);
5577         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5578         if (ret > 0)
5579                 ret = -ENOENT;
5580         if (ret) {
5581                 err |= ret;
5582                 goto fail;
5583         }
5584
5585         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5586                             struct btrfs_inode_item);
5587         btrfs_set_inode_size(path->nodes[0], ii, isize);
5588         btrfs_mark_buffer_dirty(path->nodes[0]);
5589 fail:
5590         btrfs_commit_transaction(trans, root);
5591 out:
5592         if (ret)
5593                 error("failed to set isize in inode %llu root %llu",
5594                       ino, root->root_key.objectid);
5595         else
5596                 printf("Set isize in inode %llu root %llu to %llu\n",
5597                        ino, root->root_key.objectid, isize);
5598
5599         btrfs_release_path(path);
5600         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5601         err |= ret;
5602
5603         return err;
5604 }
5605
5606 /*
5607  * Wrapper function for btrfs_add_orphan_item().
5608  *
5609  * Returns 0     on success.
5610  * Returns != 0  on error.
5611  */
5612 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5613                                            struct btrfs_path *path, u64 ino)
5614 {
5615         struct btrfs_trans_handle *trans;
5616         struct btrfs_key research_key;
5617         int ret;
5618         int err = 0;
5619
5620         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5621
5622         trans = btrfs_start_transaction(root, 1);
5623         if (IS_ERR(trans)) {
5624                 ret = PTR_ERR(trans);
5625                 err |= ret;
5626                 goto out;
5627         }
5628
5629         btrfs_release_path(path);
5630         ret = btrfs_add_orphan_item(trans, root, path, ino);
5631         err |= ret;
5632         btrfs_commit_transaction(trans, root);
5633 out:
5634         if (ret)
5635                 error("failed to add inode %llu as orphan item root %llu",
5636                       ino, root->root_key.objectid);
5637         else
5638                 printf("Added inode %llu as orphan item root %llu\n",
5639                        ino, root->root_key.objectid);
5640
5641         btrfs_release_path(path);
5642         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5643         err |= ret;
5644
5645         return err;
5646 }
5647
5648 /* Set inode_item nlink to @ref_count.
5649  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5650  *
5651  * Returns 0 on success
5652  */
5653 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5654                                       struct btrfs_path *path, u64 ino,
5655                                       const char *name, u32 namelen,
5656                                       u64 ref_count, u8 filetype, u64 *nlink)
5657 {
5658         struct btrfs_trans_handle *trans;
5659         struct btrfs_inode_item *ii;
5660         struct btrfs_key key;
5661         struct btrfs_key old_key;
5662         char namebuf[BTRFS_NAME_LEN] = {0};
5663         int name_len;
5664         int ret;
5665         int ret2;
5666
5667         /* save the key */
5668         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5669
5670         if (name && namelen) {
5671                 ASSERT(namelen <= BTRFS_NAME_LEN);
5672                 memcpy(namebuf, name, namelen);
5673                 name_len = namelen;
5674         } else {
5675                 sprintf(namebuf, "%llu", ino);
5676                 name_len = count_digits(ino);
5677                 printf("Can't find file name for inode %llu, use %s instead\n",
5678                        ino, namebuf);
5679         }
5680
5681         trans = btrfs_start_transaction(root, 1);
5682         if (IS_ERR(trans)) {
5683                 ret = PTR_ERR(trans);
5684                 goto out;
5685         }
5686
5687         btrfs_release_path(path);
5688         /* if refs is 0, put it into lostfound */
5689         if (ref_count == 0) {
5690                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5691                                               name_len, filetype, &ref_count);
5692                 if (ret)
5693                         goto fail;
5694         }
5695
5696         /* reset inode_item's nlink to ref_count */
5697         key.objectid = ino;
5698         key.type = BTRFS_INODE_ITEM_KEY;
5699         key.offset = 0;
5700
5701         btrfs_release_path(path);
5702         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5703         if (ret > 0)
5704                 ret = -ENOENT;
5705         if (ret)
5706                 goto fail;
5707
5708         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5709                             struct btrfs_inode_item);
5710         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5711         btrfs_mark_buffer_dirty(path->nodes[0]);
5712
5713         if (nlink)
5714                 *nlink = ref_count;
5715 fail:
5716         btrfs_commit_transaction(trans, root);
5717 out:
5718         if (ret)
5719                 error(
5720         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5721                        root->objectid, ino, namebuf, filetype);
5722         else
5723                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5724                        root->objectid, ino, namebuf, filetype);
5725
5726         /* research */
5727         btrfs_release_path(path);
5728         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5729         if (ret2 < 0)
5730                 return ret |= ret2;
5731         return ret;
5732 }
5733
5734 /*
5735  * Check INODE_ITEM and related ITEMs (the same inode number)
5736  * 1. check link count
5737  * 2. check inode ref/extref
5738  * 3. check dir item/index
5739  *
5740  * @ext_ref:    the EXTENDED_IREF feature
5741  *
5742  * Return 0 if no error occurred.
5743  * Return >0 for error or hit the traversal is done(by error bitmap)
5744  */
5745 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5746                             unsigned int ext_ref)
5747 {
5748         struct extent_buffer *node;
5749         struct btrfs_inode_item *ii;
5750         struct btrfs_key key;
5751         struct btrfs_key last_key;
5752         u64 inode_id;
5753         u32 mode;
5754         u64 nlink;
5755         u64 nbytes;
5756         u64 isize;
5757         u64 size = 0;
5758         u64 refs = 0;
5759         u64 extent_end = 0;
5760         u64 extent_size = 0;
5761         unsigned int dir;
5762         unsigned int nodatasum;
5763         int slot;
5764         int ret;
5765         int err = 0;
5766         char namebuf[BTRFS_NAME_LEN] = {0};
5767         u32 name_len = 0;
5768
5769         node = path->nodes[0];
5770         slot = path->slots[0];
5771
5772         btrfs_item_key_to_cpu(node, &key, slot);
5773         inode_id = key.objectid;
5774
5775         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5776                 ret = btrfs_next_item(root, path);
5777                 if (ret > 0)
5778                         err |= LAST_ITEM;
5779                 return err;
5780         }
5781
5782         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5783         isize = btrfs_inode_size(node, ii);
5784         nbytes = btrfs_inode_nbytes(node, ii);
5785         mode = btrfs_inode_mode(node, ii);
5786         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5787         nlink = btrfs_inode_nlink(node, ii);
5788         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5789
5790         while (1) {
5791                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5792                 ret = btrfs_next_item(root, path);
5793                 if (ret < 0) {
5794                         /* out will fill 'err' rusing current statistics */
5795                         goto out;
5796                 } else if (ret > 0) {
5797                         err |= LAST_ITEM;
5798                         goto out;
5799                 }
5800
5801                 node = path->nodes[0];
5802                 slot = path->slots[0];
5803                 btrfs_item_key_to_cpu(node, &key, slot);
5804                 if (key.objectid != inode_id)
5805                         goto out;
5806
5807                 switch (key.type) {
5808                 case BTRFS_INODE_REF_KEY:
5809                         ret = check_inode_ref(root, &key, path, namebuf,
5810                                               &name_len, &refs, mode);
5811                         err |= ret;
5812                         break;
5813                 case BTRFS_INODE_EXTREF_KEY:
5814                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5815                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5816                                         root->objectid, key.objectid,
5817                                         key.offset);
5818                         ret = check_inode_extref(root, &key, node, slot, &refs,
5819                                                  mode);
5820                         err |= ret;
5821                         break;
5822                 case BTRFS_DIR_ITEM_KEY:
5823                 case BTRFS_DIR_INDEX_KEY:
5824                         if (!dir) {
5825                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5826                                         root->objectid, inode_id,
5827                                         imode_to_type(mode), key.objectid,
5828                                         key.offset);
5829                         }
5830                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5831                         err |= ret;
5832                         break;
5833                 case BTRFS_EXTENT_DATA_KEY:
5834                         if (dir) {
5835                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5836                                         root->objectid, inode_id, key.objectid,
5837                                         key.offset);
5838                         }
5839                         ret = check_file_extent(root, &key, node, slot,
5840                                                 nodatasum, &extent_size,
5841                                                 &extent_end);
5842                         err |= ret;
5843                         break;
5844                 case BTRFS_XATTR_ITEM_KEY:
5845                         break;
5846                 default:
5847                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5848                               key.objectid, key.type, key.offset);
5849                 }
5850         }
5851
5852 out:
5853         if (err & LAST_ITEM) {
5854                 btrfs_release_path(path);
5855                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5856                 if (ret)
5857                         return err;
5858         }
5859
5860         /* verify INODE_ITEM nlink/isize/nbytes */
5861         if (dir) {
5862                 if (repair && (err & DIR_COUNT_AGAIN)) {
5863                         err &= ~DIR_COUNT_AGAIN;
5864                         count_dir_isize(root, inode_id, &size);
5865                 }
5866
5867                 if ((nlink != 1 || refs != 1) && repair) {
5868                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5869                                 namebuf, name_len, refs, imode_to_type(mode),
5870                                 &nlink);
5871                 }
5872
5873                 if (nlink != 1) {
5874                         err |= LINK_COUNT_ERROR;
5875                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5876                               root->objectid, inode_id, nlink);
5877                 }
5878
5879                 /*
5880                  * Just a warning, as dir inode nbytes is just an
5881                  * instructive value.
5882                  */
5883                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5884                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5885                                 root->objectid, inode_id,
5886                                 root->fs_info->nodesize);
5887                 }
5888
5889                 if (isize != size) {
5890                         if (repair)
5891                                 ret = repair_dir_isize_lowmem(root, path,
5892                                                               inode_id, size);
5893                         if (!repair || ret) {
5894                                 err |= ISIZE_ERROR;
5895                                 error(
5896                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5897                                       root->objectid, inode_id, isize, size);
5898                         }
5899                 }
5900         } else {
5901                 if (nlink != refs) {
5902                         if (repair)
5903                                 ret = repair_inode_nlinks_lowmem(root, path,
5904                                          inode_id, namebuf, name_len, refs,
5905                                          imode_to_type(mode), &nlink);
5906                         if (!repair || ret) {
5907                                 err |= LINK_COUNT_ERROR;
5908                                 error(
5909                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5910                                       root->objectid, inode_id, nlink, refs);
5911                         }
5912                 } else if (!nlink) {
5913                         if (repair)
5914                                 ret = repair_inode_orphan_item_lowmem(root,
5915                                                               path, inode_id);
5916                         if (!repair || ret) {
5917                                 err |= ORPHAN_ITEM;
5918                                 error("root %llu INODE[%llu] is orphan item",
5919                                       root->objectid, inode_id);
5920                         }
5921                 }
5922
5923                 if (!nbytes && !no_holes && extent_end < isize) {
5924                         if (repair)
5925                                 ret = punch_extent_hole(root, inode_id,
5926                                                 extent_end, isize - extent_end);
5927                         if (!repair || ret) {
5928                                 err |= NBYTES_ERROR;
5929                                 error(
5930         "root %llu INODE[%llu] size %llu should have a file extent hole",
5931                                       root->objectid, inode_id, isize);
5932                         }
5933                 }
5934
5935                 if (nbytes != extent_size) {
5936                         if (repair)
5937                                 ret = repair_inode_nbytes_lowmem(root, path,
5938                                                          inode_id, extent_size);
5939                         if (!repair || ret) {
5940                                 err |= NBYTES_ERROR;
5941                                 error(
5942         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5943                                       root->objectid, inode_id, nbytes,
5944                                       extent_size);
5945                         }
5946                 }
5947         }
5948
5949         if (err & LAST_ITEM)
5950                 btrfs_next_item(root, path);
5951         return err;
5952 }
5953
5954 /*
5955  * Insert the missing inode item and inode ref.
5956  *
5957  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5958  * Root dir should be handled specially because root dir is the root of fs.
5959  *
5960  * returns err (>0 or 0) after repair
5961  */
5962 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5963 {
5964         struct btrfs_trans_handle *trans;
5965         struct btrfs_key key;
5966         struct btrfs_path path;
5967         int filetype = BTRFS_FT_DIR;
5968         int ret = 0;
5969
5970         btrfs_init_path(&path);
5971
5972         if (err & INODE_REF_MISSING) {
5973                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5974                 key.type = BTRFS_INODE_REF_KEY;
5975                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5976
5977                 trans = btrfs_start_transaction(root, 1);
5978                 if (IS_ERR(trans)) {
5979                         ret = PTR_ERR(trans);
5980                         goto out;
5981                 }
5982
5983                 btrfs_release_path(&path);
5984                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5985                 if (ret)
5986                         goto trans_fail;
5987
5988                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5989                                              BTRFS_FIRST_FREE_OBJECTID,
5990                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5991                 if (ret)
5992                         goto trans_fail;
5993
5994                 printf("Add INODE_REF[%llu %llu] name %s\n",
5995                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5996                        "..");
5997                 err &= ~INODE_REF_MISSING;
5998 trans_fail:
5999                 if (ret)
6000                         error("fail to insert first inode's ref");
6001                 btrfs_commit_transaction(trans, root);
6002         }
6003
6004         if (err & INODE_ITEM_MISSING) {
6005                 ret = repair_inode_item_missing(root,
6006                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6007                 if (ret)
6008                         goto out;
6009                 err &= ~INODE_ITEM_MISSING;
6010         }
6011 out:
6012         if (ret)
6013                 error("fail to repair first inode");
6014         btrfs_release_path(&path);
6015         return err;
6016 }
6017
6018 /*
6019  * check first root dir's inode_item and inode_ref
6020  *
6021  * returns 0 means no error
6022  * returns >0 means error
6023  * returns <0 means fatal error
6024  */
6025 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6026 {
6027         struct btrfs_path path;
6028         struct btrfs_key key;
6029         struct btrfs_inode_item *ii;
6030         u64 index;
6031         u32 mode;
6032         int err = 0;
6033         int ret;
6034
6035         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6036         key.type = BTRFS_INODE_ITEM_KEY;
6037         key.offset = 0;
6038
6039         /* For root being dropped, we don't need to check first inode */
6040         if (btrfs_root_refs(&root->root_item) == 0 &&
6041             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6042             BTRFS_FIRST_FREE_OBJECTID)
6043                 return 0;
6044
6045         btrfs_init_path(&path);
6046         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6047         if (ret < 0)
6048                 goto out;
6049         if (ret > 0) {
6050                 ret = 0;
6051                 err |= INODE_ITEM_MISSING;
6052         } else {
6053                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6054                                     struct btrfs_inode_item);
6055                 mode = btrfs_inode_mode(path.nodes[0], ii);
6056                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6057                         err |= INODE_ITEM_MISMATCH;
6058         }
6059
6060         /* lookup first inode ref */
6061         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6062         key.type = BTRFS_INODE_REF_KEY;
6063         /* special index value */
6064         index = 0;
6065
6066         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6067         if (ret < 0)
6068                 goto out;
6069         err |= ret;
6070
6071 out:
6072         btrfs_release_path(&path);
6073
6074         if (err && repair)
6075                 err = repair_fs_first_inode(root, err);
6076
6077         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6078                 error("root dir INODE_ITEM is %s",
6079                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6080         if (err & INODE_REF_MISSING)
6081                 error("root dir INODE_REF is missing");
6082
6083         return ret < 0 ? ret : err;
6084 }
6085
6086 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6087                                                 u64 parent, u64 root)
6088 {
6089         struct rb_node *node;
6090         struct tree_backref *back = NULL;
6091         struct tree_backref match = {
6092                 .node = {
6093                         .is_data = 0,
6094                 },
6095         };
6096
6097         if (parent) {
6098                 match.parent = parent;
6099                 match.node.full_backref = 1;
6100         } else {
6101                 match.root = root;
6102         }
6103
6104         node = rb_search(&rec->backref_tree, &match.node.node,
6105                          (rb_compare_keys)compare_extent_backref, NULL);
6106         if (node)
6107                 back = to_tree_backref(rb_node_to_extent_backref(node));
6108
6109         return back;
6110 }
6111
6112 static struct data_backref *find_data_backref(struct extent_record *rec,
6113                                                 u64 parent, u64 root,
6114                                                 u64 owner, u64 offset,
6115                                                 int found_ref,
6116                                                 u64 disk_bytenr, u64 bytes)
6117 {
6118         struct rb_node *node;
6119         struct data_backref *back = NULL;
6120         struct data_backref match = {
6121                 .node = {
6122                         .is_data = 1,
6123                 },
6124                 .owner = owner,
6125                 .offset = offset,
6126                 .bytes = bytes,
6127                 .found_ref = found_ref,
6128                 .disk_bytenr = disk_bytenr,
6129         };
6130
6131         if (parent) {
6132                 match.parent = parent;
6133                 match.node.full_backref = 1;
6134         } else {
6135                 match.root = root;
6136         }
6137
6138         node = rb_search(&rec->backref_tree, &match.node.node,
6139                          (rb_compare_keys)compare_extent_backref, NULL);
6140         if (node)
6141                 back = to_data_backref(rb_node_to_extent_backref(node));
6142
6143         return back;
6144 }
6145 /*
6146  * Iterate all item on the tree and call check_inode_item() to check.
6147  *
6148  * @root:       the root of the tree to be checked.
6149  * @ext_ref:    the EXTENDED_IREF feature
6150  *
6151  * Return 0 if no error found.
6152  * Return <0 for error.
6153  */
6154 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6155 {
6156         struct btrfs_path path;
6157         struct node_refs nrefs;
6158         struct btrfs_root_item *root_item = &root->root_item;
6159         int ret;
6160         int level;
6161         int err = 0;
6162
6163         /*
6164          * We need to manually check the first inode item(256)
6165          * As the following traversal function will only start from
6166          * the first inode item in the leaf, if inode item(256) is missing
6167          * we will just skip it forever.
6168          */
6169         ret = check_fs_first_inode(root, ext_ref);
6170         if (ret < 0)
6171                 return ret;
6172         err |= !!ret;
6173
6174         memset(&nrefs, 0, sizeof(nrefs));
6175         level = btrfs_header_level(root->node);
6176         btrfs_init_path(&path);
6177
6178         if (btrfs_root_refs(root_item) > 0 ||
6179             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6180                 path.nodes[level] = root->node;
6181                 path.slots[level] = 0;
6182                 extent_buffer_get(root->node);
6183         } else {
6184                 struct btrfs_key key;
6185
6186                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6187                 level = root_item->drop_level;
6188                 path.lowest_level = level;
6189                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6190                 if (ret < 0)
6191                         goto out;
6192                 ret = 0;
6193         }
6194
6195         while (1) {
6196                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
6197                 err |= !!ret;
6198
6199                 /* if ret is negative, walk shall stop */
6200                 if (ret < 0) {
6201                         ret = err;
6202                         break;
6203                 }
6204
6205                 ret = walk_up_tree_v2(root, &path, &level);
6206                 if (ret != 0) {
6207                         /* Normal exit, reset ret to err */
6208                         ret = err;
6209                         break;
6210                 }
6211         }
6212
6213 out:
6214         btrfs_release_path(&path);
6215         return ret;
6216 }
6217
6218 /*
6219  * Find the relative ref for root_ref and root_backref.
6220  *
6221  * @root:       the root of the root tree.
6222  * @ref_key:    the key of the root ref.
6223  *
6224  * Return 0 if no error occurred.
6225  */
6226 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6227                           struct extent_buffer *node, int slot)
6228 {
6229         struct btrfs_path path;
6230         struct btrfs_key key;
6231         struct btrfs_root_ref *ref;
6232         struct btrfs_root_ref *backref;
6233         char ref_name[BTRFS_NAME_LEN] = {0};
6234         char backref_name[BTRFS_NAME_LEN] = {0};
6235         u64 ref_dirid;
6236         u64 ref_seq;
6237         u32 ref_namelen;
6238         u64 backref_dirid;
6239         u64 backref_seq;
6240         u32 backref_namelen;
6241         u32 len;
6242         int ret;
6243         int err = 0;
6244
6245         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6246         ref_dirid = btrfs_root_ref_dirid(node, ref);
6247         ref_seq = btrfs_root_ref_sequence(node, ref);
6248         ref_namelen = btrfs_root_ref_name_len(node, ref);
6249
6250         if (ref_namelen <= BTRFS_NAME_LEN) {
6251                 len = ref_namelen;
6252         } else {
6253                 len = BTRFS_NAME_LEN;
6254                 warning("%s[%llu %llu] ref_name too long",
6255                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6256                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6257                         ref_key->offset);
6258         }
6259         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6260
6261         /* Find relative root_ref */
6262         key.objectid = ref_key->offset;
6263         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6264         key.offset = ref_key->objectid;
6265
6266         btrfs_init_path(&path);
6267         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6268         if (ret) {
6269                 err |= ROOT_REF_MISSING;
6270                 error("%s[%llu %llu] couldn't find relative ref",
6271                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6272                       "ROOT_REF" : "ROOT_BACKREF",
6273                       ref_key->objectid, ref_key->offset);
6274                 goto out;
6275         }
6276
6277         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6278                                  struct btrfs_root_ref);
6279         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6280         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6281         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6282
6283         if (backref_namelen <= BTRFS_NAME_LEN) {
6284                 len = backref_namelen;
6285         } else {
6286                 len = BTRFS_NAME_LEN;
6287                 warning("%s[%llu %llu] ref_name too long",
6288                         key.type == BTRFS_ROOT_REF_KEY ?
6289                         "ROOT_REF" : "ROOT_BACKREF",
6290                         key.objectid, key.offset);
6291         }
6292         read_extent_buffer(path.nodes[0], backref_name,
6293                            (unsigned long)(backref + 1), len);
6294
6295         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6296             ref_namelen != backref_namelen ||
6297             strncmp(ref_name, backref_name, len)) {
6298                 err |= ROOT_REF_MISMATCH;
6299                 error("%s[%llu %llu] mismatch relative ref",
6300                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6301                       "ROOT_REF" : "ROOT_BACKREF",
6302                       ref_key->objectid, ref_key->offset);
6303         }
6304 out:
6305         btrfs_release_path(&path);
6306         return err;
6307 }
6308
6309 /*
6310  * Check all fs/file tree in low_memory mode.
6311  *
6312  * 1. for fs tree root item, call check_fs_root_v2()
6313  * 2. for fs tree root ref/backref, call check_root_ref()
6314  *
6315  * Return 0 if no error occurred.
6316  */
6317 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6318 {
6319         struct btrfs_root *tree_root = fs_info->tree_root;
6320         struct btrfs_root *cur_root = NULL;
6321         struct btrfs_path path;
6322         struct btrfs_key key;
6323         struct extent_buffer *node;
6324         unsigned int ext_ref;
6325         int slot;
6326         int ret;
6327         int err = 0;
6328
6329         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6330
6331         btrfs_init_path(&path);
6332         key.objectid = BTRFS_FS_TREE_OBJECTID;
6333         key.offset = 0;
6334         key.type = BTRFS_ROOT_ITEM_KEY;
6335
6336         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6337         if (ret < 0) {
6338                 err = ret;
6339                 goto out;
6340         } else if (ret > 0) {
6341                 err = -ENOENT;
6342                 goto out;
6343         }
6344
6345         while (1) {
6346                 node = path.nodes[0];
6347                 slot = path.slots[0];
6348                 btrfs_item_key_to_cpu(node, &key, slot);
6349                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6350                         goto out;
6351                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6352                     fs_root_objectid(key.objectid)) {
6353                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6354                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6355                                                                        &key);
6356                         } else {
6357                                 key.offset = (u64)-1;
6358                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6359                         }
6360
6361                         if (IS_ERR(cur_root)) {
6362                                 error("Fail to read fs/subvol tree: %lld",
6363                                       key.objectid);
6364                                 err = -EIO;
6365                                 goto next;
6366                         }
6367
6368                         ret = check_fs_root_v2(cur_root, ext_ref);
6369                         err |= ret;
6370
6371                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6372                                 btrfs_free_fs_root(cur_root);
6373                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6374                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6375                         ret = check_root_ref(tree_root, &key, node, slot);
6376                         err |= ret;
6377                 }
6378 next:
6379                 ret = btrfs_next_item(tree_root, &path);
6380                 if (ret > 0)
6381                         goto out;
6382                 if (ret < 0) {
6383                         err = ret;
6384                         goto out;
6385                 }
6386         }
6387
6388 out:
6389         btrfs_release_path(&path);
6390         return err;
6391 }
6392
6393 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6394                           struct cache_tree *root_cache)
6395 {
6396         int ret;
6397
6398         if (!ctx.progress_enabled)
6399                 fprintf(stderr, "checking fs roots\n");
6400         if (check_mode == CHECK_MODE_LOWMEM)
6401                 ret = check_fs_roots_v2(fs_info);
6402         else
6403                 ret = check_fs_roots(fs_info, root_cache);
6404
6405         return ret;
6406 }
6407
6408 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6409 {
6410         struct extent_backref *back, *tmp;
6411         struct tree_backref *tback;
6412         struct data_backref *dback;
6413         u64 found = 0;
6414         int err = 0;
6415
6416         rbtree_postorder_for_each_entry_safe(back, tmp,
6417                                              &rec->backref_tree, node) {
6418                 if (!back->found_extent_tree) {
6419                         err = 1;
6420                         if (!print_errs)
6421                                 goto out;
6422                         if (back->is_data) {
6423                                 dback = to_data_backref(back);
6424                                 fprintf(stderr, "Data backref %llu %s %llu"
6425                                         " owner %llu offset %llu num_refs %lu"
6426                                         " not found in extent tree\n",
6427                                         (unsigned long long)rec->start,
6428                                         back->full_backref ?
6429                                         "parent" : "root",
6430                                         back->full_backref ?
6431                                         (unsigned long long)dback->parent:
6432                                         (unsigned long long)dback->root,
6433                                         (unsigned long long)dback->owner,
6434                                         (unsigned long long)dback->offset,
6435                                         (unsigned long)dback->num_refs);
6436                         } else {
6437                                 tback = to_tree_backref(back);
6438                                 fprintf(stderr, "Tree backref %llu parent %llu"
6439                                         " root %llu not found in extent tree\n",
6440                                         (unsigned long long)rec->start,
6441                                         (unsigned long long)tback->parent,
6442                                         (unsigned long long)tback->root);
6443                         }
6444                 }
6445                 if (!back->is_data && !back->found_ref) {
6446                         err = 1;
6447                         if (!print_errs)
6448                                 goto out;
6449                         tback = to_tree_backref(back);
6450                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6451                                 (unsigned long long)rec->start,
6452                                 back->full_backref ? "parent" : "root",
6453                                 back->full_backref ?
6454                                 (unsigned long long)tback->parent :
6455                                 (unsigned long long)tback->root, back);
6456                 }
6457                 if (back->is_data) {
6458                         dback = to_data_backref(back);
6459                         if (dback->found_ref != dback->num_refs) {
6460                                 err = 1;
6461                                 if (!print_errs)
6462                                         goto out;
6463                                 fprintf(stderr, "Incorrect local backref count"
6464                                         " on %llu %s %llu owner %llu"
6465                                         " offset %llu found %u wanted %u back %p\n",
6466                                         (unsigned long long)rec->start,
6467                                         back->full_backref ?
6468                                         "parent" : "root",
6469                                         back->full_backref ?
6470                                         (unsigned long long)dback->parent:
6471                                         (unsigned long long)dback->root,
6472                                         (unsigned long long)dback->owner,
6473                                         (unsigned long long)dback->offset,
6474                                         dback->found_ref, dback->num_refs, back);
6475                         }
6476                         if (dback->disk_bytenr != rec->start) {
6477                                 err = 1;
6478                                 if (!print_errs)
6479                                         goto out;
6480                                 fprintf(stderr, "Backref disk bytenr does not"
6481                                         " match extent record, bytenr=%llu, "
6482                                         "ref bytenr=%llu\n",
6483                                         (unsigned long long)rec->start,
6484                                         (unsigned long long)dback->disk_bytenr);
6485                         }
6486
6487                         if (dback->bytes != rec->nr) {
6488                                 err = 1;
6489                                 if (!print_errs)
6490                                         goto out;
6491                                 fprintf(stderr, "Backref bytes do not match "
6492                                         "extent backref, bytenr=%llu, ref "
6493                                         "bytes=%llu, backref bytes=%llu\n",
6494                                         (unsigned long long)rec->start,
6495                                         (unsigned long long)rec->nr,
6496                                         (unsigned long long)dback->bytes);
6497                         }
6498                 }
6499                 if (!back->is_data) {
6500                         found += 1;
6501                 } else {
6502                         dback = to_data_backref(back);
6503                         found += dback->found_ref;
6504                 }
6505         }
6506         if (found != rec->refs) {
6507                 err = 1;
6508                 if (!print_errs)
6509                         goto out;
6510                 fprintf(stderr, "Incorrect global backref count "
6511                         "on %llu found %llu wanted %llu\n",
6512                         (unsigned long long)rec->start,
6513                         (unsigned long long)found,
6514                         (unsigned long long)rec->refs);
6515         }
6516 out:
6517         return err;
6518 }
6519
6520 static void __free_one_backref(struct rb_node *node)
6521 {
6522         struct extent_backref *back = rb_node_to_extent_backref(node);
6523
6524         free(back);
6525 }
6526
6527 static void free_all_extent_backrefs(struct extent_record *rec)
6528 {
6529         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6530 }
6531
6532 static void free_extent_record_cache(struct cache_tree *extent_cache)
6533 {
6534         struct cache_extent *cache;
6535         struct extent_record *rec;
6536
6537         while (1) {
6538                 cache = first_cache_extent(extent_cache);
6539                 if (!cache)
6540                         break;
6541                 rec = container_of(cache, struct extent_record, cache);
6542                 remove_cache_extent(extent_cache, cache);
6543                 free_all_extent_backrefs(rec);
6544                 free(rec);
6545         }
6546 }
6547
6548 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6549                                  struct extent_record *rec)
6550 {
6551         if (rec->content_checked && rec->owner_ref_checked &&
6552             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6553             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6554             !rec->bad_full_backref && !rec->crossing_stripes &&
6555             !rec->wrong_chunk_type) {
6556                 remove_cache_extent(extent_cache, &rec->cache);
6557                 free_all_extent_backrefs(rec);
6558                 list_del_init(&rec->list);
6559                 free(rec);
6560         }
6561         return 0;
6562 }
6563
6564 static int check_owner_ref(struct btrfs_root *root,
6565                             struct extent_record *rec,
6566                             struct extent_buffer *buf)
6567 {
6568         struct extent_backref *node, *tmp;
6569         struct tree_backref *back;
6570         struct btrfs_root *ref_root;
6571         struct btrfs_key key;
6572         struct btrfs_path path;
6573         struct extent_buffer *parent;
6574         int level;
6575         int found = 0;
6576         int ret;
6577
6578         rbtree_postorder_for_each_entry_safe(node, tmp,
6579                                              &rec->backref_tree, node) {
6580                 if (node->is_data)
6581                         continue;
6582                 if (!node->found_ref)
6583                         continue;
6584                 if (node->full_backref)
6585                         continue;
6586                 back = to_tree_backref(node);
6587                 if (btrfs_header_owner(buf) == back->root)
6588                         return 0;
6589         }
6590         BUG_ON(rec->is_root);
6591
6592         /* try to find the block by search corresponding fs tree */
6593         key.objectid = btrfs_header_owner(buf);
6594         key.type = BTRFS_ROOT_ITEM_KEY;
6595         key.offset = (u64)-1;
6596
6597         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6598         if (IS_ERR(ref_root))
6599                 return 1;
6600
6601         level = btrfs_header_level(buf);
6602         if (level == 0)
6603                 btrfs_item_key_to_cpu(buf, &key, 0);
6604         else
6605                 btrfs_node_key_to_cpu(buf, &key, 0);
6606
6607         btrfs_init_path(&path);
6608         path.lowest_level = level + 1;
6609         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6610         if (ret < 0)
6611                 return 0;
6612
6613         parent = path.nodes[level + 1];
6614         if (parent && buf->start == btrfs_node_blockptr(parent,
6615                                                         path.slots[level + 1]))
6616                 found = 1;
6617
6618         btrfs_release_path(&path);
6619         return found ? 0 : 1;
6620 }
6621
6622 static int is_extent_tree_record(struct extent_record *rec)
6623 {
6624         struct extent_backref *node, *tmp;
6625         struct tree_backref *back;
6626         int is_extent = 0;
6627
6628         rbtree_postorder_for_each_entry_safe(node, tmp,
6629                                              &rec->backref_tree, node) {
6630                 if (node->is_data)
6631                         return 0;
6632                 back = to_tree_backref(node);
6633                 if (node->full_backref)
6634                         return 0;
6635                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6636                         is_extent = 1;
6637         }
6638         return is_extent;
6639 }
6640
6641
6642 static int record_bad_block_io(struct btrfs_fs_info *info,
6643                                struct cache_tree *extent_cache,
6644                                u64 start, u64 len)
6645 {
6646         struct extent_record *rec;
6647         struct cache_extent *cache;
6648         struct btrfs_key key;
6649
6650         cache = lookup_cache_extent(extent_cache, start, len);
6651         if (!cache)
6652                 return 0;
6653
6654         rec = container_of(cache, struct extent_record, cache);
6655         if (!is_extent_tree_record(rec))
6656                 return 0;
6657
6658         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6659         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6660 }
6661
6662 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6663                        struct extent_buffer *buf, int slot)
6664 {
6665         if (btrfs_header_level(buf)) {
6666                 struct btrfs_key_ptr ptr1, ptr2;
6667
6668                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6669                                    sizeof(struct btrfs_key_ptr));
6670                 read_extent_buffer(buf, &ptr2,
6671                                    btrfs_node_key_ptr_offset(slot + 1),
6672                                    sizeof(struct btrfs_key_ptr));
6673                 write_extent_buffer(buf, &ptr1,
6674                                     btrfs_node_key_ptr_offset(slot + 1),
6675                                     sizeof(struct btrfs_key_ptr));
6676                 write_extent_buffer(buf, &ptr2,
6677                                     btrfs_node_key_ptr_offset(slot),
6678                                     sizeof(struct btrfs_key_ptr));
6679                 if (slot == 0) {
6680                         struct btrfs_disk_key key;
6681                         btrfs_node_key(buf, &key, 0);
6682                         btrfs_fixup_low_keys(root, path, &key,
6683                                              btrfs_header_level(buf) + 1);
6684                 }
6685         } else {
6686                 struct btrfs_item *item1, *item2;
6687                 struct btrfs_key k1, k2;
6688                 char *item1_data, *item2_data;
6689                 u32 item1_offset, item2_offset, item1_size, item2_size;
6690
6691                 item1 = btrfs_item_nr(slot);
6692                 item2 = btrfs_item_nr(slot + 1);
6693                 btrfs_item_key_to_cpu(buf, &k1, slot);
6694                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6695                 item1_offset = btrfs_item_offset(buf, item1);
6696                 item2_offset = btrfs_item_offset(buf, item2);
6697                 item1_size = btrfs_item_size(buf, item1);
6698                 item2_size = btrfs_item_size(buf, item2);
6699
6700                 item1_data = malloc(item1_size);
6701                 if (!item1_data)
6702                         return -ENOMEM;
6703                 item2_data = malloc(item2_size);
6704                 if (!item2_data) {
6705                         free(item1_data);
6706                         return -ENOMEM;
6707                 }
6708
6709                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6710                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6711
6712                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6713                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6714                 free(item1_data);
6715                 free(item2_data);
6716
6717                 btrfs_set_item_offset(buf, item1, item2_offset);
6718                 btrfs_set_item_offset(buf, item2, item1_offset);
6719                 btrfs_set_item_size(buf, item1, item2_size);
6720                 btrfs_set_item_size(buf, item2, item1_size);
6721
6722                 path->slots[0] = slot;
6723                 btrfs_set_item_key_unsafe(root, path, &k2);
6724                 path->slots[0] = slot + 1;
6725                 btrfs_set_item_key_unsafe(root, path, &k1);
6726         }
6727         return 0;
6728 }
6729
6730 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6731 {
6732         struct extent_buffer *buf;
6733         struct btrfs_key k1, k2;
6734         int i;
6735         int level = path->lowest_level;
6736         int ret = -EIO;
6737
6738         buf = path->nodes[level];
6739         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6740                 if (level) {
6741                         btrfs_node_key_to_cpu(buf, &k1, i);
6742                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6743                 } else {
6744                         btrfs_item_key_to_cpu(buf, &k1, i);
6745                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6746                 }
6747                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6748                         continue;
6749                 ret = swap_values(root, path, buf, i);
6750                 if (ret)
6751                         break;
6752                 btrfs_mark_buffer_dirty(buf);
6753                 i = 0;
6754         }
6755         return ret;
6756 }
6757
6758 static int delete_bogus_item(struct btrfs_root *root,
6759                              struct btrfs_path *path,
6760                              struct extent_buffer *buf, int slot)
6761 {
6762         struct btrfs_key key;
6763         int nritems = btrfs_header_nritems(buf);
6764
6765         btrfs_item_key_to_cpu(buf, &key, slot);
6766
6767         /* These are all the keys we can deal with missing. */
6768         if (key.type != BTRFS_DIR_INDEX_KEY &&
6769             key.type != BTRFS_EXTENT_ITEM_KEY &&
6770             key.type != BTRFS_METADATA_ITEM_KEY &&
6771             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6772             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6773                 return -1;
6774
6775         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6776                (unsigned long long)key.objectid, key.type,
6777                (unsigned long long)key.offset, slot, buf->start);
6778         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6779                               btrfs_item_nr_offset(slot + 1),
6780                               sizeof(struct btrfs_item) *
6781                               (nritems - slot - 1));
6782         btrfs_set_header_nritems(buf, nritems - 1);
6783         if (slot == 0) {
6784                 struct btrfs_disk_key disk_key;
6785
6786                 btrfs_item_key(buf, &disk_key, 0);
6787                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6788         }
6789         btrfs_mark_buffer_dirty(buf);
6790         return 0;
6791 }
6792
6793 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6794 {
6795         struct extent_buffer *buf;
6796         int i;
6797         int ret = 0;
6798
6799         /* We should only get this for leaves */
6800         BUG_ON(path->lowest_level);
6801         buf = path->nodes[0];
6802 again:
6803         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6804                 unsigned int shift = 0, offset;
6805
6806                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6807                     BTRFS_LEAF_DATA_SIZE(root)) {
6808                         if (btrfs_item_end_nr(buf, i) >
6809                             BTRFS_LEAF_DATA_SIZE(root)) {
6810                                 ret = delete_bogus_item(root, path, buf, i);
6811                                 if (!ret)
6812                                         goto again;
6813                                 fprintf(stderr, "item is off the end of the "
6814                                         "leaf, can't fix\n");
6815                                 ret = -EIO;
6816                                 break;
6817                         }
6818                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6819                                 btrfs_item_end_nr(buf, i);
6820                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6821                            btrfs_item_offset_nr(buf, i - 1)) {
6822                         if (btrfs_item_end_nr(buf, i) >
6823                             btrfs_item_offset_nr(buf, i - 1)) {
6824                                 ret = delete_bogus_item(root, path, buf, i);
6825                                 if (!ret)
6826                                         goto again;
6827                                 fprintf(stderr, "items overlap, can't fix\n");
6828                                 ret = -EIO;
6829                                 break;
6830                         }
6831                         shift = btrfs_item_offset_nr(buf, i - 1) -
6832                                 btrfs_item_end_nr(buf, i);
6833                 }
6834                 if (!shift)
6835                         continue;
6836
6837                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6838                        i, shift, (unsigned long long)buf->start);
6839                 offset = btrfs_item_offset_nr(buf, i);
6840                 memmove_extent_buffer(buf,
6841                                       btrfs_leaf_data(buf) + offset + shift,
6842                                       btrfs_leaf_data(buf) + offset,
6843                                       btrfs_item_size_nr(buf, i));
6844                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6845                                       offset + shift);
6846                 btrfs_mark_buffer_dirty(buf);
6847         }
6848
6849         /*
6850          * We may have moved things, in which case we want to exit so we don't
6851          * write those changes out.  Once we have proper abort functionality in
6852          * progs this can be changed to something nicer.
6853          */
6854         BUG_ON(ret);
6855         return ret;
6856 }
6857
6858 /*
6859  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6860  * then just return -EIO.
6861  */
6862 static int try_to_fix_bad_block(struct btrfs_root *root,
6863                                 struct extent_buffer *buf,
6864                                 enum btrfs_tree_block_status status)
6865 {
6866         struct btrfs_trans_handle *trans;
6867         struct ulist *roots;
6868         struct ulist_node *node;
6869         struct btrfs_root *search_root;
6870         struct btrfs_path path;
6871         struct ulist_iterator iter;
6872         struct btrfs_key root_key, key;
6873         int ret;
6874
6875         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6876             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6877                 return -EIO;
6878
6879         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6880         if (ret)
6881                 return -EIO;
6882
6883         btrfs_init_path(&path);
6884         ULIST_ITER_INIT(&iter);
6885         while ((node = ulist_next(roots, &iter))) {
6886                 root_key.objectid = node->val;
6887                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6888                 root_key.offset = (u64)-1;
6889
6890                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6891                 if (IS_ERR(root)) {
6892                         ret = -EIO;
6893                         break;
6894                 }
6895
6896
6897                 trans = btrfs_start_transaction(search_root, 0);
6898                 if (IS_ERR(trans)) {
6899                         ret = PTR_ERR(trans);
6900                         break;
6901                 }
6902
6903                 path.lowest_level = btrfs_header_level(buf);
6904                 path.skip_check_block = 1;
6905                 if (path.lowest_level)
6906                         btrfs_node_key_to_cpu(buf, &key, 0);
6907                 else
6908                         btrfs_item_key_to_cpu(buf, &key, 0);
6909                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6910                 if (ret) {
6911                         ret = -EIO;
6912                         btrfs_commit_transaction(trans, search_root);
6913                         break;
6914                 }
6915                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6916                         ret = fix_key_order(search_root, &path);
6917                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6918                         ret = fix_item_offset(search_root, &path);
6919                 if (ret) {
6920                         btrfs_commit_transaction(trans, search_root);
6921                         break;
6922                 }
6923                 btrfs_release_path(&path);
6924                 btrfs_commit_transaction(trans, search_root);
6925         }
6926         ulist_free(roots);
6927         btrfs_release_path(&path);
6928         return ret;
6929 }
6930
6931 static int check_block(struct btrfs_root *root,
6932                        struct cache_tree *extent_cache,
6933                        struct extent_buffer *buf, u64 flags)
6934 {
6935         struct extent_record *rec;
6936         struct cache_extent *cache;
6937         struct btrfs_key key;
6938         enum btrfs_tree_block_status status;
6939         int ret = 0;
6940         int level;
6941
6942         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6943         if (!cache)
6944                 return 1;
6945         rec = container_of(cache, struct extent_record, cache);
6946         rec->generation = btrfs_header_generation(buf);
6947
6948         level = btrfs_header_level(buf);
6949         if (btrfs_header_nritems(buf) > 0) {
6950
6951                 if (level == 0)
6952                         btrfs_item_key_to_cpu(buf, &key, 0);
6953                 else
6954                         btrfs_node_key_to_cpu(buf, &key, 0);
6955
6956                 rec->info_objectid = key.objectid;
6957         }
6958         rec->info_level = level;
6959
6960         if (btrfs_is_leaf(buf))
6961                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6962         else
6963                 status = btrfs_check_node(root, &rec->parent_key, buf);
6964
6965         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6966                 if (repair)
6967                         status = try_to_fix_bad_block(root, buf, status);
6968                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6969                         ret = -EIO;
6970                         fprintf(stderr, "bad block %llu\n",
6971                                 (unsigned long long)buf->start);
6972                 } else {
6973                         /*
6974                          * Signal to callers we need to start the scan over
6975                          * again since we'll have cowed blocks.
6976                          */
6977                         ret = -EAGAIN;
6978                 }
6979         } else {
6980                 rec->content_checked = 1;
6981                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6982                         rec->owner_ref_checked = 1;
6983                 else {
6984                         ret = check_owner_ref(root, rec, buf);
6985                         if (!ret)
6986                                 rec->owner_ref_checked = 1;
6987                 }
6988         }
6989         if (!ret)
6990                 maybe_free_extent_rec(extent_cache, rec);
6991         return ret;
6992 }
6993
6994 #if 0
6995 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6996                                                 u64 parent, u64 root)
6997 {
6998         struct list_head *cur = rec->backrefs.next;
6999         struct extent_backref *node;
7000         struct tree_backref *back;
7001
7002         while(cur != &rec->backrefs) {
7003                 node = to_extent_backref(cur);
7004                 cur = cur->next;
7005                 if (node->is_data)
7006                         continue;
7007                 back = to_tree_backref(node);
7008                 if (parent > 0) {
7009                         if (!node->full_backref)
7010                                 continue;
7011                         if (parent == back->parent)
7012                                 return back;
7013                 } else {
7014                         if (node->full_backref)
7015                                 continue;
7016                         if (back->root == root)
7017                                 return back;
7018                 }
7019         }
7020         return NULL;
7021 }
7022 #endif
7023
7024 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7025                                                 u64 parent, u64 root)
7026 {
7027         struct tree_backref *ref = malloc(sizeof(*ref));
7028
7029         if (!ref)
7030                 return NULL;
7031         memset(&ref->node, 0, sizeof(ref->node));
7032         if (parent > 0) {
7033                 ref->parent = parent;
7034                 ref->node.full_backref = 1;
7035         } else {
7036                 ref->root = root;
7037                 ref->node.full_backref = 0;
7038         }
7039
7040         return ref;
7041 }
7042
7043 #if 0
7044 static struct data_backref *find_data_backref(struct extent_record *rec,
7045                                                 u64 parent, u64 root,
7046                                                 u64 owner, u64 offset,
7047                                                 int found_ref,
7048                                                 u64 disk_bytenr, u64 bytes)
7049 {
7050         struct list_head *cur = rec->backrefs.next;
7051         struct extent_backref *node;
7052         struct data_backref *back;
7053
7054         while(cur != &rec->backrefs) {
7055                 node = to_extent_backref(cur);
7056                 cur = cur->next;
7057                 if (!node->is_data)
7058                         continue;
7059                 back = to_data_backref(node);
7060                 if (parent > 0) {
7061                         if (!node->full_backref)
7062                                 continue;
7063                         if (parent == back->parent)
7064                                 return back;
7065                 } else {
7066                         if (node->full_backref)
7067                                 continue;
7068                         if (back->root == root && back->owner == owner &&
7069                             back->offset == offset) {
7070                                 if (found_ref && node->found_ref &&
7071                                     (back->bytes != bytes ||
7072                                     back->disk_bytenr != disk_bytenr))
7073                                         continue;
7074                                 return back;
7075                         }
7076                 }
7077         }
7078         return NULL;
7079 }
7080 #endif
7081
7082 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7083                                                 u64 parent, u64 root,
7084                                                 u64 owner, u64 offset,
7085                                                 u64 max_size)
7086 {
7087         struct data_backref *ref = malloc(sizeof(*ref));
7088
7089         if (!ref)
7090                 return NULL;
7091         memset(&ref->node, 0, sizeof(ref->node));
7092         ref->node.is_data = 1;
7093
7094         if (parent > 0) {
7095                 ref->parent = parent;
7096                 ref->owner = 0;
7097                 ref->offset = 0;
7098                 ref->node.full_backref = 1;
7099         } else {
7100                 ref->root = root;
7101                 ref->owner = owner;
7102                 ref->offset = offset;
7103                 ref->node.full_backref = 0;
7104         }
7105         ref->bytes = max_size;
7106         ref->found_ref = 0;
7107         ref->num_refs = 0;
7108         if (max_size > rec->max_size)
7109                 rec->max_size = max_size;
7110         return ref;
7111 }
7112
7113 /* Check if the type of extent matches with its chunk */
7114 static void check_extent_type(struct extent_record *rec)
7115 {
7116         struct btrfs_block_group_cache *bg_cache;
7117
7118         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7119         if (!bg_cache)
7120                 return;
7121
7122         /* data extent, check chunk directly*/
7123         if (!rec->metadata) {
7124                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7125                         rec->wrong_chunk_type = 1;
7126                 return;
7127         }
7128
7129         /* metadata extent, check the obvious case first */
7130         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7131                                  BTRFS_BLOCK_GROUP_METADATA))) {
7132                 rec->wrong_chunk_type = 1;
7133                 return;
7134         }
7135
7136         /*
7137          * Check SYSTEM extent, as it's also marked as metadata, we can only
7138          * make sure it's a SYSTEM extent by its backref
7139          */
7140         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7141                 struct extent_backref *node;
7142                 struct tree_backref *tback;
7143                 u64 bg_type;
7144
7145                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7146                 if (node->is_data) {
7147                         /* tree block shouldn't have data backref */
7148                         rec->wrong_chunk_type = 1;
7149                         return;
7150                 }
7151                 tback = container_of(node, struct tree_backref, node);
7152
7153                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7154                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7155                 else
7156                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7157                 if (!(bg_cache->flags & bg_type))
7158                         rec->wrong_chunk_type = 1;
7159         }
7160 }
7161
7162 /*
7163  * Allocate a new extent record, fill default values from @tmpl and insert int
7164  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7165  * the cache, otherwise it fails.
7166  */
7167 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7168                 struct extent_record *tmpl)
7169 {
7170         struct extent_record *rec;
7171         int ret = 0;
7172
7173         BUG_ON(tmpl->max_size == 0);
7174         rec = malloc(sizeof(*rec));
7175         if (!rec)
7176                 return -ENOMEM;
7177         rec->start = tmpl->start;
7178         rec->max_size = tmpl->max_size;
7179         rec->nr = max(tmpl->nr, tmpl->max_size);
7180         rec->found_rec = tmpl->found_rec;
7181         rec->content_checked = tmpl->content_checked;
7182         rec->owner_ref_checked = tmpl->owner_ref_checked;
7183         rec->num_duplicates = 0;
7184         rec->metadata = tmpl->metadata;
7185         rec->flag_block_full_backref = FLAG_UNSET;
7186         rec->bad_full_backref = 0;
7187         rec->crossing_stripes = 0;
7188         rec->wrong_chunk_type = 0;
7189         rec->is_root = tmpl->is_root;
7190         rec->refs = tmpl->refs;
7191         rec->extent_item_refs = tmpl->extent_item_refs;
7192         rec->parent_generation = tmpl->parent_generation;
7193         INIT_LIST_HEAD(&rec->backrefs);
7194         INIT_LIST_HEAD(&rec->dups);
7195         INIT_LIST_HEAD(&rec->list);
7196         rec->backref_tree = RB_ROOT;
7197         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7198         rec->cache.start = tmpl->start;
7199         rec->cache.size = tmpl->nr;
7200         ret = insert_cache_extent(extent_cache, &rec->cache);
7201         if (ret) {
7202                 free(rec);
7203                 return ret;
7204         }
7205         bytes_used += rec->nr;
7206
7207         if (tmpl->metadata)
7208                 rec->crossing_stripes = check_crossing_stripes(global_info,
7209                                 rec->start, global_info->nodesize);
7210         check_extent_type(rec);
7211         return ret;
7212 }
7213
7214 /*
7215  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7216  * some are hints:
7217  * - refs              - if found, increase refs
7218  * - is_root           - if found, set
7219  * - content_checked   - if found, set
7220  * - owner_ref_checked - if found, set
7221  *
7222  * If not found, create a new one, initialize and insert.
7223  */
7224 static int add_extent_rec(struct cache_tree *extent_cache,
7225                 struct extent_record *tmpl)
7226 {
7227         struct extent_record *rec;
7228         struct cache_extent *cache;
7229         int ret = 0;
7230         int dup = 0;
7231
7232         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7233         if (cache) {
7234                 rec = container_of(cache, struct extent_record, cache);
7235                 if (tmpl->refs)
7236                         rec->refs++;
7237                 if (rec->nr == 1)
7238                         rec->nr = max(tmpl->nr, tmpl->max_size);
7239
7240                 /*
7241                  * We need to make sure to reset nr to whatever the extent
7242                  * record says was the real size, this way we can compare it to
7243                  * the backrefs.
7244                  */
7245                 if (tmpl->found_rec) {
7246                         if (tmpl->start != rec->start || rec->found_rec) {
7247                                 struct extent_record *tmp;
7248
7249                                 dup = 1;
7250                                 if (list_empty(&rec->list))
7251                                         list_add_tail(&rec->list,
7252                                                       &duplicate_extents);
7253
7254                                 /*
7255                                  * We have to do this song and dance in case we
7256                                  * find an extent record that falls inside of
7257                                  * our current extent record but does not have
7258                                  * the same objectid.
7259                                  */
7260                                 tmp = malloc(sizeof(*tmp));
7261                                 if (!tmp)
7262                                         return -ENOMEM;
7263                                 tmp->start = tmpl->start;
7264                                 tmp->max_size = tmpl->max_size;
7265                                 tmp->nr = tmpl->nr;
7266                                 tmp->found_rec = 1;
7267                                 tmp->metadata = tmpl->metadata;
7268                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7269                                 INIT_LIST_HEAD(&tmp->list);
7270                                 list_add_tail(&tmp->list, &rec->dups);
7271                                 rec->num_duplicates++;
7272                         } else {
7273                                 rec->nr = tmpl->nr;
7274                                 rec->found_rec = 1;
7275                         }
7276                 }
7277
7278                 if (tmpl->extent_item_refs && !dup) {
7279                         if (rec->extent_item_refs) {
7280                                 fprintf(stderr, "block %llu rec "
7281                                         "extent_item_refs %llu, passed %llu\n",
7282                                         (unsigned long long)tmpl->start,
7283                                         (unsigned long long)
7284                                                         rec->extent_item_refs,
7285                                         (unsigned long long)tmpl->extent_item_refs);
7286                         }
7287                         rec->extent_item_refs = tmpl->extent_item_refs;
7288                 }
7289                 if (tmpl->is_root)
7290                         rec->is_root = 1;
7291                 if (tmpl->content_checked)
7292                         rec->content_checked = 1;
7293                 if (tmpl->owner_ref_checked)
7294                         rec->owner_ref_checked = 1;
7295                 memcpy(&rec->parent_key, &tmpl->parent_key,
7296                                 sizeof(tmpl->parent_key));
7297                 if (tmpl->parent_generation)
7298                         rec->parent_generation = tmpl->parent_generation;
7299                 if (rec->max_size < tmpl->max_size)
7300                         rec->max_size = tmpl->max_size;
7301
7302                 /*
7303                  * A metadata extent can't cross stripe_len boundary, otherwise
7304                  * kernel scrub won't be able to handle it.
7305                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7306                  * it.
7307                  */
7308                 if (tmpl->metadata)
7309                         rec->crossing_stripes = check_crossing_stripes(
7310                                         global_info, rec->start,
7311                                         global_info->nodesize);
7312                 check_extent_type(rec);
7313                 maybe_free_extent_rec(extent_cache, rec);
7314                 return ret;
7315         }
7316
7317         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7318
7319         return ret;
7320 }
7321
7322 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7323                             u64 parent, u64 root, int found_ref)
7324 {
7325         struct extent_record *rec;
7326         struct tree_backref *back;
7327         struct cache_extent *cache;
7328         int ret;
7329         bool insert = false;
7330
7331         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7332         if (!cache) {
7333                 struct extent_record tmpl;
7334
7335                 memset(&tmpl, 0, sizeof(tmpl));
7336                 tmpl.start = bytenr;
7337                 tmpl.nr = 1;
7338                 tmpl.metadata = 1;
7339                 tmpl.max_size = 1;
7340
7341                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7342                 if (ret)
7343                         return ret;
7344
7345                 /* really a bug in cache_extent implement now */
7346                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7347                 if (!cache)
7348                         return -ENOENT;
7349         }
7350
7351         rec = container_of(cache, struct extent_record, cache);
7352         if (rec->start != bytenr) {
7353                 /*
7354                  * Several cause, from unaligned bytenr to over lapping extents
7355                  */
7356                 return -EEXIST;
7357         }
7358
7359         back = find_tree_backref(rec, parent, root);
7360         if (!back) {
7361                 back = alloc_tree_backref(rec, parent, root);
7362                 if (!back)
7363                         return -ENOMEM;
7364                 insert = true;
7365         }
7366
7367         if (found_ref) {
7368                 if (back->node.found_ref) {
7369                         fprintf(stderr, "Extent back ref already exists "
7370                                 "for %llu parent %llu root %llu \n",
7371                                 (unsigned long long)bytenr,
7372                                 (unsigned long long)parent,
7373                                 (unsigned long long)root);
7374                 }
7375                 back->node.found_ref = 1;
7376         } else {
7377                 if (back->node.found_extent_tree) {
7378                         fprintf(stderr, "Extent back ref already exists "
7379                                 "for %llu parent %llu root %llu \n",
7380                                 (unsigned long long)bytenr,
7381                                 (unsigned long long)parent,
7382                                 (unsigned long long)root);
7383                 }
7384                 back->node.found_extent_tree = 1;
7385         }
7386         if (insert)
7387                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7388                         compare_extent_backref));
7389         check_extent_type(rec);
7390         maybe_free_extent_rec(extent_cache, rec);
7391         return 0;
7392 }
7393
7394 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7395                             u64 parent, u64 root, u64 owner, u64 offset,
7396                             u32 num_refs, int found_ref, u64 max_size)
7397 {
7398         struct extent_record *rec;
7399         struct data_backref *back;
7400         struct cache_extent *cache;
7401         int ret;
7402         bool insert = false;
7403
7404         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7405         if (!cache) {
7406                 struct extent_record tmpl;
7407
7408                 memset(&tmpl, 0, sizeof(tmpl));
7409                 tmpl.start = bytenr;
7410                 tmpl.nr = 1;
7411                 tmpl.max_size = max_size;
7412
7413                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7414                 if (ret)
7415                         return ret;
7416
7417                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7418                 if (!cache)
7419                         abort();
7420         }
7421
7422         rec = container_of(cache, struct extent_record, cache);
7423         if (rec->max_size < max_size)
7424                 rec->max_size = max_size;
7425
7426         /*
7427          * If found_ref is set then max_size is the real size and must match the
7428          * existing refs.  So if we have already found a ref then we need to
7429          * make sure that this ref matches the existing one, otherwise we need
7430          * to add a new backref so we can notice that the backrefs don't match
7431          * and we need to figure out who is telling the truth.  This is to
7432          * account for that awful fsync bug I introduced where we'd end up with
7433          * a btrfs_file_extent_item that would have its length include multiple
7434          * prealloc extents or point inside of a prealloc extent.
7435          */
7436         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7437                                  bytenr, max_size);
7438         if (!back) {
7439                 back = alloc_data_backref(rec, parent, root, owner, offset,
7440                                           max_size);
7441                 BUG_ON(!back);
7442                 insert = true;
7443         }
7444
7445         if (found_ref) {
7446                 BUG_ON(num_refs != 1);
7447                 if (back->node.found_ref)
7448                         BUG_ON(back->bytes != max_size);
7449                 back->node.found_ref = 1;
7450                 back->found_ref += 1;
7451                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7452                         back->bytes = max_size;
7453                         back->disk_bytenr = bytenr;
7454
7455                         /* Need to reinsert if not already in the tree */
7456                         if (!insert) {
7457                                 rb_erase(&back->node.node, &rec->backref_tree);
7458                                 insert = true;
7459                         }
7460                 }
7461                 rec->refs += 1;
7462                 rec->content_checked = 1;
7463                 rec->owner_ref_checked = 1;
7464         } else {
7465                 if (back->node.found_extent_tree) {
7466                         fprintf(stderr, "Extent back ref already exists "
7467                                 "for %llu parent %llu root %llu "
7468                                 "owner %llu offset %llu num_refs %lu\n",
7469                                 (unsigned long long)bytenr,
7470                                 (unsigned long long)parent,
7471                                 (unsigned long long)root,
7472                                 (unsigned long long)owner,
7473                                 (unsigned long long)offset,
7474                                 (unsigned long)num_refs);
7475                 }
7476                 back->num_refs = num_refs;
7477                 back->node.found_extent_tree = 1;
7478         }
7479         if (insert)
7480                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7481                         compare_extent_backref));
7482
7483         maybe_free_extent_rec(extent_cache, rec);
7484         return 0;
7485 }
7486
7487 static int add_pending(struct cache_tree *pending,
7488                        struct cache_tree *seen, u64 bytenr, u32 size)
7489 {
7490         int ret;
7491         ret = add_cache_extent(seen, bytenr, size);
7492         if (ret)
7493                 return ret;
7494         add_cache_extent(pending, bytenr, size);
7495         return 0;
7496 }
7497
7498 static int pick_next_pending(struct cache_tree *pending,
7499                         struct cache_tree *reada,
7500                         struct cache_tree *nodes,
7501                         u64 last, struct block_info *bits, int bits_nr,
7502                         int *reada_bits)
7503 {
7504         unsigned long node_start = last;
7505         struct cache_extent *cache;
7506         int ret;
7507
7508         cache = search_cache_extent(reada, 0);
7509         if (cache) {
7510                 bits[0].start = cache->start;
7511                 bits[0].size = cache->size;
7512                 *reada_bits = 1;
7513                 return 1;
7514         }
7515         *reada_bits = 0;
7516         if (node_start > 32768)
7517                 node_start -= 32768;
7518
7519         cache = search_cache_extent(nodes, node_start);
7520         if (!cache)
7521                 cache = search_cache_extent(nodes, 0);
7522
7523         if (!cache) {
7524                  cache = search_cache_extent(pending, 0);
7525                  if (!cache)
7526                          return 0;
7527                  ret = 0;
7528                  do {
7529                          bits[ret].start = cache->start;
7530                          bits[ret].size = cache->size;
7531                          cache = next_cache_extent(cache);
7532                          ret++;
7533                  } while (cache && ret < bits_nr);
7534                  return ret;
7535         }
7536
7537         ret = 0;
7538         do {
7539                 bits[ret].start = cache->start;
7540                 bits[ret].size = cache->size;
7541                 cache = next_cache_extent(cache);
7542                 ret++;
7543         } while (cache && ret < bits_nr);
7544
7545         if (bits_nr - ret > 8) {
7546                 u64 lookup = bits[0].start + bits[0].size;
7547                 struct cache_extent *next;
7548                 next = search_cache_extent(pending, lookup);
7549                 while(next) {
7550                         if (next->start - lookup > 32768)
7551                                 break;
7552                         bits[ret].start = next->start;
7553                         bits[ret].size = next->size;
7554                         lookup = next->start + next->size;
7555                         ret++;
7556                         if (ret == bits_nr)
7557                                 break;
7558                         next = next_cache_extent(next);
7559                         if (!next)
7560                                 break;
7561                 }
7562         }
7563         return ret;
7564 }
7565
7566 static void free_chunk_record(struct cache_extent *cache)
7567 {
7568         struct chunk_record *rec;
7569
7570         rec = container_of(cache, struct chunk_record, cache);
7571         list_del_init(&rec->list);
7572         list_del_init(&rec->dextents);
7573         free(rec);
7574 }
7575
7576 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7577 {
7578         cache_tree_free_extents(chunk_cache, free_chunk_record);
7579 }
7580
7581 static void free_device_record(struct rb_node *node)
7582 {
7583         struct device_record *rec;
7584
7585         rec = container_of(node, struct device_record, node);
7586         free(rec);
7587 }
7588
7589 FREE_RB_BASED_TREE(device_cache, free_device_record);
7590
7591 int insert_block_group_record(struct block_group_tree *tree,
7592                               struct block_group_record *bg_rec)
7593 {
7594         int ret;
7595
7596         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7597         if (ret)
7598                 return ret;
7599
7600         list_add_tail(&bg_rec->list, &tree->block_groups);
7601         return 0;
7602 }
7603
7604 static void free_block_group_record(struct cache_extent *cache)
7605 {
7606         struct block_group_record *rec;
7607
7608         rec = container_of(cache, struct block_group_record, cache);
7609         list_del_init(&rec->list);
7610         free(rec);
7611 }
7612
7613 void free_block_group_tree(struct block_group_tree *tree)
7614 {
7615         cache_tree_free_extents(&tree->tree, free_block_group_record);
7616 }
7617
7618 int insert_device_extent_record(struct device_extent_tree *tree,
7619                                 struct device_extent_record *de_rec)
7620 {
7621         int ret;
7622
7623         /*
7624          * Device extent is a bit different from the other extents, because
7625          * the extents which belong to the different devices may have the
7626          * same start and size, so we need use the special extent cache
7627          * search/insert functions.
7628          */
7629         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7630         if (ret)
7631                 return ret;
7632
7633         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7634         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7635         return 0;
7636 }
7637
7638 static void free_device_extent_record(struct cache_extent *cache)
7639 {
7640         struct device_extent_record *rec;
7641
7642         rec = container_of(cache, struct device_extent_record, cache);
7643         if (!list_empty(&rec->chunk_list))
7644                 list_del_init(&rec->chunk_list);
7645         if (!list_empty(&rec->device_list))
7646                 list_del_init(&rec->device_list);
7647         free(rec);
7648 }
7649
7650 void free_device_extent_tree(struct device_extent_tree *tree)
7651 {
7652         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7653 }
7654
7655 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7656 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7657                                  struct extent_buffer *leaf, int slot)
7658 {
7659         struct btrfs_extent_ref_v0 *ref0;
7660         struct btrfs_key key;
7661         int ret;
7662
7663         btrfs_item_key_to_cpu(leaf, &key, slot);
7664         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7665         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7666                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7667                                 0, 0);
7668         } else {
7669                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7670                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7671         }
7672         return ret;
7673 }
7674 #endif
7675
7676 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7677                                             struct btrfs_key *key,
7678                                             int slot)
7679 {
7680         struct btrfs_chunk *ptr;
7681         struct chunk_record *rec;
7682         int num_stripes, i;
7683
7684         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7685         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7686
7687         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7688         if (!rec) {
7689                 fprintf(stderr, "memory allocation failed\n");
7690                 exit(-1);
7691         }
7692
7693         INIT_LIST_HEAD(&rec->list);
7694         INIT_LIST_HEAD(&rec->dextents);
7695         rec->bg_rec = NULL;
7696
7697         rec->cache.start = key->offset;
7698         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7699
7700         rec->generation = btrfs_header_generation(leaf);
7701
7702         rec->objectid = key->objectid;
7703         rec->type = key->type;
7704         rec->offset = key->offset;
7705
7706         rec->length = rec->cache.size;
7707         rec->owner = btrfs_chunk_owner(leaf, ptr);
7708         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7709         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7710         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7711         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7712         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7713         rec->num_stripes = num_stripes;
7714         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7715
7716         for (i = 0; i < rec->num_stripes; ++i) {
7717                 rec->stripes[i].devid =
7718                         btrfs_stripe_devid_nr(leaf, ptr, i);
7719                 rec->stripes[i].offset =
7720                         btrfs_stripe_offset_nr(leaf, ptr, i);
7721                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7722                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7723                                 BTRFS_UUID_SIZE);
7724         }
7725
7726         return rec;
7727 }
7728
7729 static int process_chunk_item(struct cache_tree *chunk_cache,
7730                               struct btrfs_key *key, struct extent_buffer *eb,
7731                               int slot)
7732 {
7733         struct chunk_record *rec;
7734         struct btrfs_chunk *chunk;
7735         int ret = 0;
7736
7737         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7738         /*
7739          * Do extra check for this chunk item,
7740          *
7741          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7742          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7743          * and owner<->key_type check.
7744          */
7745         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7746                                       key->offset);
7747         if (ret < 0) {
7748                 error("chunk(%llu, %llu) is not valid, ignore it",
7749                       key->offset, btrfs_chunk_length(eb, chunk));
7750                 return 0;
7751         }
7752         rec = btrfs_new_chunk_record(eb, key, slot);
7753         ret = insert_cache_extent(chunk_cache, &rec->cache);
7754         if (ret) {
7755                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7756                         rec->offset, rec->length);
7757                 free(rec);
7758         }
7759
7760         return ret;
7761 }
7762
7763 static int process_device_item(struct rb_root *dev_cache,
7764                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7765 {
7766         struct btrfs_dev_item *ptr;
7767         struct device_record *rec;
7768         int ret = 0;
7769
7770         ptr = btrfs_item_ptr(eb,
7771                 slot, struct btrfs_dev_item);
7772
7773         rec = malloc(sizeof(*rec));
7774         if (!rec) {
7775                 fprintf(stderr, "memory allocation failed\n");
7776                 return -ENOMEM;
7777         }
7778
7779         rec->devid = key->offset;
7780         rec->generation = btrfs_header_generation(eb);
7781
7782         rec->objectid = key->objectid;
7783         rec->type = key->type;
7784         rec->offset = key->offset;
7785
7786         rec->devid = btrfs_device_id(eb, ptr);
7787         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7788         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7789
7790         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7791         if (ret) {
7792                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7793                 free(rec);
7794         }
7795
7796         return ret;
7797 }
7798
7799 struct block_group_record *
7800 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7801                              int slot)
7802 {
7803         struct btrfs_block_group_item *ptr;
7804         struct block_group_record *rec;
7805
7806         rec = calloc(1, sizeof(*rec));
7807         if (!rec) {
7808                 fprintf(stderr, "memory allocation failed\n");
7809                 exit(-1);
7810         }
7811
7812         rec->cache.start = key->objectid;
7813         rec->cache.size = key->offset;
7814
7815         rec->generation = btrfs_header_generation(leaf);
7816
7817         rec->objectid = key->objectid;
7818         rec->type = key->type;
7819         rec->offset = key->offset;
7820
7821         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7822         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7823
7824         INIT_LIST_HEAD(&rec->list);
7825
7826         return rec;
7827 }
7828
7829 static int process_block_group_item(struct block_group_tree *block_group_cache,
7830                                     struct btrfs_key *key,
7831                                     struct extent_buffer *eb, int slot)
7832 {
7833         struct block_group_record *rec;
7834         int ret = 0;
7835
7836         rec = btrfs_new_block_group_record(eb, key, slot);
7837         ret = insert_block_group_record(block_group_cache, rec);
7838         if (ret) {
7839                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7840                         rec->objectid, rec->offset);
7841                 free(rec);
7842         }
7843
7844         return ret;
7845 }
7846
7847 struct device_extent_record *
7848 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7849                                struct btrfs_key *key, int slot)
7850 {
7851         struct device_extent_record *rec;
7852         struct btrfs_dev_extent *ptr;
7853
7854         rec = calloc(1, sizeof(*rec));
7855         if (!rec) {
7856                 fprintf(stderr, "memory allocation failed\n");
7857                 exit(-1);
7858         }
7859
7860         rec->cache.objectid = key->objectid;
7861         rec->cache.start = key->offset;
7862
7863         rec->generation = btrfs_header_generation(leaf);
7864
7865         rec->objectid = key->objectid;
7866         rec->type = key->type;
7867         rec->offset = key->offset;
7868
7869         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7870         rec->chunk_objecteid =
7871                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7872         rec->chunk_offset =
7873                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7874         rec->length = btrfs_dev_extent_length(leaf, ptr);
7875         rec->cache.size = rec->length;
7876
7877         INIT_LIST_HEAD(&rec->chunk_list);
7878         INIT_LIST_HEAD(&rec->device_list);
7879
7880         return rec;
7881 }
7882
7883 static int
7884 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7885                            struct btrfs_key *key, struct extent_buffer *eb,
7886                            int slot)
7887 {
7888         struct device_extent_record *rec;
7889         int ret;
7890
7891         rec = btrfs_new_device_extent_record(eb, key, slot);
7892         ret = insert_device_extent_record(dev_extent_cache, rec);
7893         if (ret) {
7894                 fprintf(stderr,
7895                         "Device extent[%llu, %llu, %llu] existed.\n",
7896                         rec->objectid, rec->offset, rec->length);
7897                 free(rec);
7898         }
7899
7900         return ret;
7901 }
7902
7903 static int process_extent_item(struct btrfs_root *root,
7904                                struct cache_tree *extent_cache,
7905                                struct extent_buffer *eb, int slot)
7906 {
7907         struct btrfs_extent_item *ei;
7908         struct btrfs_extent_inline_ref *iref;
7909         struct btrfs_extent_data_ref *dref;
7910         struct btrfs_shared_data_ref *sref;
7911         struct btrfs_key key;
7912         struct extent_record tmpl;
7913         unsigned long end;
7914         unsigned long ptr;
7915         int ret;
7916         int type;
7917         u32 item_size = btrfs_item_size_nr(eb, slot);
7918         u64 refs = 0;
7919         u64 offset;
7920         u64 num_bytes;
7921         int metadata = 0;
7922
7923         btrfs_item_key_to_cpu(eb, &key, slot);
7924
7925         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7926                 metadata = 1;
7927                 num_bytes = root->fs_info->nodesize;
7928         } else {
7929                 num_bytes = key.offset;
7930         }
7931
7932         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7933                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7934                       key.objectid, root->fs_info->sectorsize);
7935                 return -EIO;
7936         }
7937         if (item_size < sizeof(*ei)) {
7938 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7939                 struct btrfs_extent_item_v0 *ei0;
7940                 BUG_ON(item_size != sizeof(*ei0));
7941                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7942                 refs = btrfs_extent_refs_v0(eb, ei0);
7943 #else
7944                 BUG();
7945 #endif
7946                 memset(&tmpl, 0, sizeof(tmpl));
7947                 tmpl.start = key.objectid;
7948                 tmpl.nr = num_bytes;
7949                 tmpl.extent_item_refs = refs;
7950                 tmpl.metadata = metadata;
7951                 tmpl.found_rec = 1;
7952                 tmpl.max_size = num_bytes;
7953
7954                 return add_extent_rec(extent_cache, &tmpl);
7955         }
7956
7957         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7958         refs = btrfs_extent_refs(eb, ei);
7959         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7960                 metadata = 1;
7961         else
7962                 metadata = 0;
7963         if (metadata && num_bytes != root->fs_info->nodesize) {
7964                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7965                       num_bytes, root->fs_info->nodesize);
7966                 return -EIO;
7967         }
7968         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7969                 error("ignore invalid data extent, length %llu is not aligned to %u",
7970                       num_bytes, root->fs_info->sectorsize);
7971                 return -EIO;
7972         }
7973
7974         memset(&tmpl, 0, sizeof(tmpl));
7975         tmpl.start = key.objectid;
7976         tmpl.nr = num_bytes;
7977         tmpl.extent_item_refs = refs;
7978         tmpl.metadata = metadata;
7979         tmpl.found_rec = 1;
7980         tmpl.max_size = num_bytes;
7981         add_extent_rec(extent_cache, &tmpl);
7982
7983         ptr = (unsigned long)(ei + 1);
7984         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7985             key.type == BTRFS_EXTENT_ITEM_KEY)
7986                 ptr += sizeof(struct btrfs_tree_block_info);
7987
7988         end = (unsigned long)ei + item_size;
7989         while (ptr < end) {
7990                 iref = (struct btrfs_extent_inline_ref *)ptr;
7991                 type = btrfs_extent_inline_ref_type(eb, iref);
7992                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7993                 switch (type) {
7994                 case BTRFS_TREE_BLOCK_REF_KEY:
7995                         ret = add_tree_backref(extent_cache, key.objectid,
7996                                         0, offset, 0);
7997                         if (ret < 0)
7998                                 error(
7999                         "add_tree_backref failed (extent items tree block): %s",
8000                                       strerror(-ret));
8001                         break;
8002                 case BTRFS_SHARED_BLOCK_REF_KEY:
8003                         ret = add_tree_backref(extent_cache, key.objectid,
8004                                         offset, 0, 0);
8005                         if (ret < 0)
8006                                 error(
8007                         "add_tree_backref failed (extent items shared block): %s",
8008                                       strerror(-ret));
8009                         break;
8010                 case BTRFS_EXTENT_DATA_REF_KEY:
8011                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8012                         add_data_backref(extent_cache, key.objectid, 0,
8013                                         btrfs_extent_data_ref_root(eb, dref),
8014                                         btrfs_extent_data_ref_objectid(eb,
8015                                                                        dref),
8016                                         btrfs_extent_data_ref_offset(eb, dref),
8017                                         btrfs_extent_data_ref_count(eb, dref),
8018                                         0, num_bytes);
8019                         break;
8020                 case BTRFS_SHARED_DATA_REF_KEY:
8021                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8022                         add_data_backref(extent_cache, key.objectid, offset,
8023                                         0, 0, 0,
8024                                         btrfs_shared_data_ref_count(eb, sref),
8025                                         0, num_bytes);
8026                         break;
8027                 default:
8028                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8029                                 key.objectid, key.type, num_bytes);
8030                         goto out;
8031                 }
8032                 ptr += btrfs_extent_inline_ref_size(type);
8033         }
8034         WARN_ON(ptr > end);
8035 out:
8036         return 0;
8037 }
8038
8039 static int check_cache_range(struct btrfs_root *root,
8040                              struct btrfs_block_group_cache *cache,
8041                              u64 offset, u64 bytes)
8042 {
8043         struct btrfs_free_space *entry;
8044         u64 *logical;
8045         u64 bytenr;
8046         int stripe_len;
8047         int i, nr, ret;
8048
8049         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8050                 bytenr = btrfs_sb_offset(i);
8051                 ret = btrfs_rmap_block(root->fs_info,
8052                                        cache->key.objectid, bytenr, 0,
8053                                        &logical, &nr, &stripe_len);
8054                 if (ret)
8055                         return ret;
8056
8057                 while (nr--) {
8058                         if (logical[nr] + stripe_len <= offset)
8059                                 continue;
8060                         if (offset + bytes <= logical[nr])
8061                                 continue;
8062                         if (logical[nr] == offset) {
8063                                 if (stripe_len >= bytes) {
8064                                         free(logical);
8065                                         return 0;
8066                                 }
8067                                 bytes -= stripe_len;
8068                                 offset += stripe_len;
8069                         } else if (logical[nr] < offset) {
8070                                 if (logical[nr] + stripe_len >=
8071                                     offset + bytes) {
8072                                         free(logical);
8073                                         return 0;
8074                                 }
8075                                 bytes = (offset + bytes) -
8076                                         (logical[nr] + stripe_len);
8077                                 offset = logical[nr] + stripe_len;
8078                         } else {
8079                                 /*
8080                                  * Could be tricky, the super may land in the
8081                                  * middle of the area we're checking.  First
8082                                  * check the easiest case, it's at the end.
8083                                  */
8084                                 if (logical[nr] + stripe_len >=
8085                                     bytes + offset) {
8086                                         bytes = logical[nr] - offset;
8087                                         continue;
8088                                 }
8089
8090                                 /* Check the left side */
8091                                 ret = check_cache_range(root, cache,
8092                                                         offset,
8093                                                         logical[nr] - offset);
8094                                 if (ret) {
8095                                         free(logical);
8096                                         return ret;
8097                                 }
8098
8099                                 /* Now we continue with the right side */
8100                                 bytes = (offset + bytes) -
8101                                         (logical[nr] + stripe_len);
8102                                 offset = logical[nr] + stripe_len;
8103                         }
8104                 }
8105
8106                 free(logical);
8107         }
8108
8109         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8110         if (!entry) {
8111                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8112                         offset, offset+bytes);
8113                 return -EINVAL;
8114         }
8115
8116         if (entry->offset != offset) {
8117                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8118                         entry->offset);
8119                 return -EINVAL;
8120         }
8121
8122         if (entry->bytes != bytes) {
8123                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8124                         bytes, entry->bytes, offset);
8125                 return -EINVAL;
8126         }
8127
8128         unlink_free_space(cache->free_space_ctl, entry);
8129         free(entry);
8130         return 0;
8131 }
8132
8133 static int verify_space_cache(struct btrfs_root *root,
8134                               struct btrfs_block_group_cache *cache)
8135 {
8136         struct btrfs_path path;
8137         struct extent_buffer *leaf;
8138         struct btrfs_key key;
8139         u64 last;
8140         int ret = 0;
8141
8142         root = root->fs_info->extent_root;
8143
8144         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8145
8146         btrfs_init_path(&path);
8147         key.objectid = last;
8148         key.offset = 0;
8149         key.type = BTRFS_EXTENT_ITEM_KEY;
8150         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8151         if (ret < 0)
8152                 goto out;
8153         ret = 0;
8154         while (1) {
8155                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8156                         ret = btrfs_next_leaf(root, &path);
8157                         if (ret < 0)
8158                                 goto out;
8159                         if (ret > 0) {
8160                                 ret = 0;
8161                                 break;
8162                         }
8163                 }
8164                 leaf = path.nodes[0];
8165                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8166                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8167                         break;
8168                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8169                     key.type != BTRFS_METADATA_ITEM_KEY) {
8170                         path.slots[0]++;
8171                         continue;
8172                 }
8173
8174                 if (last == key.objectid) {
8175                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8176                                 last = key.objectid + key.offset;
8177                         else
8178                                 last = key.objectid + root->fs_info->nodesize;
8179                         path.slots[0]++;
8180                         continue;
8181                 }
8182
8183                 ret = check_cache_range(root, cache, last,
8184                                         key.objectid - last);
8185                 if (ret)
8186                         break;
8187                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8188                         last = key.objectid + key.offset;
8189                 else
8190                         last = key.objectid + root->fs_info->nodesize;
8191                 path.slots[0]++;
8192         }
8193
8194         if (last < cache->key.objectid + cache->key.offset)
8195                 ret = check_cache_range(root, cache, last,
8196                                         cache->key.objectid +
8197                                         cache->key.offset - last);
8198
8199 out:
8200         btrfs_release_path(&path);
8201
8202         if (!ret &&
8203             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8204                 fprintf(stderr, "There are still entries left in the space "
8205                         "cache\n");
8206                 ret = -EINVAL;
8207         }
8208
8209         return ret;
8210 }
8211
8212 static int check_space_cache(struct btrfs_root *root)
8213 {
8214         struct btrfs_block_group_cache *cache;
8215         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8216         int ret;
8217         int error = 0;
8218
8219         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8220             btrfs_super_generation(root->fs_info->super_copy) !=
8221             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8222                 printf("cache and super generation don't match, space cache "
8223                        "will be invalidated\n");
8224                 return 0;
8225         }
8226
8227         if (ctx.progress_enabled) {
8228                 ctx.tp = TASK_FREE_SPACE;
8229                 task_start(ctx.info);
8230         }
8231
8232         while (1) {
8233                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8234                 if (!cache)
8235                         break;
8236
8237                 start = cache->key.objectid + cache->key.offset;
8238                 if (!cache->free_space_ctl) {
8239                         if (btrfs_init_free_space_ctl(cache,
8240                                                 root->fs_info->sectorsize)) {
8241                                 ret = -ENOMEM;
8242                                 break;
8243                         }
8244                 } else {
8245                         btrfs_remove_free_space_cache(cache);
8246                 }
8247
8248                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8249                         ret = exclude_super_stripes(root, cache);
8250                         if (ret) {
8251                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8252                                         strerror(-ret));
8253                                 error++;
8254                                 continue;
8255                         }
8256                         ret = load_free_space_tree(root->fs_info, cache);
8257                         free_excluded_extents(root, cache);
8258                         if (ret < 0) {
8259                                 fprintf(stderr, "could not load free space tree: %s\n",
8260                                         strerror(-ret));
8261                                 error++;
8262                                 continue;
8263                         }
8264                         error += ret;
8265                 } else {
8266                         ret = load_free_space_cache(root->fs_info, cache);
8267                         if (!ret)
8268                                 continue;
8269                 }
8270
8271                 ret = verify_space_cache(root, cache);
8272                 if (ret) {
8273                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8274                                 cache->key.objectid);
8275                         error++;
8276                 }
8277         }
8278
8279         task_stop(ctx.info);
8280
8281         return error ? -EINVAL : 0;
8282 }
8283
8284 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8285                         u64 num_bytes, unsigned long leaf_offset,
8286                         struct extent_buffer *eb) {
8287
8288         struct btrfs_fs_info *fs_info = root->fs_info;
8289         u64 offset = 0;
8290         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8291         char *data;
8292         unsigned long csum_offset;
8293         u32 csum;
8294         u32 csum_expected;
8295         u64 read_len;
8296         u64 data_checked = 0;
8297         u64 tmp;
8298         int ret = 0;
8299         int mirror;
8300         int num_copies;
8301
8302         if (num_bytes % fs_info->sectorsize)
8303                 return -EINVAL;
8304
8305         data = malloc(num_bytes);
8306         if (!data)
8307                 return -ENOMEM;
8308
8309         while (offset < num_bytes) {
8310                 mirror = 0;
8311 again:
8312                 read_len = num_bytes - offset;
8313                 /* read as much space once a time */
8314                 ret = read_extent_data(fs_info, data + offset,
8315                                 bytenr + offset, &read_len, mirror);
8316                 if (ret)
8317                         goto out;
8318                 data_checked = 0;
8319                 /* verify every 4k data's checksum */
8320                 while (data_checked < read_len) {
8321                         csum = ~(u32)0;
8322                         tmp = offset + data_checked;
8323
8324                         csum = btrfs_csum_data((char *)data + tmp,
8325                                                csum, fs_info->sectorsize);
8326                         btrfs_csum_final(csum, (u8 *)&csum);
8327
8328                         csum_offset = leaf_offset +
8329                                  tmp / fs_info->sectorsize * csum_size;
8330                         read_extent_buffer(eb, (char *)&csum_expected,
8331                                            csum_offset, csum_size);
8332                         /* try another mirror */
8333                         if (csum != csum_expected) {
8334                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8335                                                 mirror, bytenr + tmp,
8336                                                 csum, csum_expected);
8337                                 num_copies = btrfs_num_copies(root->fs_info,
8338                                                 bytenr, num_bytes);
8339                                 if (mirror < num_copies - 1) {
8340                                         mirror += 1;
8341                                         goto again;
8342                                 }
8343                         }
8344                         data_checked += fs_info->sectorsize;
8345                 }
8346                 offset += read_len;
8347         }
8348 out:
8349         free(data);
8350         return ret;
8351 }
8352
8353 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8354                                u64 num_bytes)
8355 {
8356         struct btrfs_path path;
8357         struct extent_buffer *leaf;
8358         struct btrfs_key key;
8359         int ret;
8360
8361         btrfs_init_path(&path);
8362         key.objectid = bytenr;
8363         key.type = BTRFS_EXTENT_ITEM_KEY;
8364         key.offset = (u64)-1;
8365
8366 again:
8367         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8368                                 0, 0);
8369         if (ret < 0) {
8370                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8371                 btrfs_release_path(&path);
8372                 return ret;
8373         } else if (ret) {
8374                 if (path.slots[0] > 0) {
8375                         path.slots[0]--;
8376                 } else {
8377                         ret = btrfs_prev_leaf(root, &path);
8378                         if (ret < 0) {
8379                                 goto out;
8380                         } else if (ret > 0) {
8381                                 ret = 0;
8382                                 goto out;
8383                         }
8384                 }
8385         }
8386
8387         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8388
8389         /*
8390          * Block group items come before extent items if they have the same
8391          * bytenr, so walk back one more just in case.  Dear future traveller,
8392          * first congrats on mastering time travel.  Now if it's not too much
8393          * trouble could you go back to 2006 and tell Chris to make the
8394          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8395          * EXTENT_ITEM_KEY please?
8396          */
8397         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8398                 if (path.slots[0] > 0) {
8399                         path.slots[0]--;
8400                 } else {
8401                         ret = btrfs_prev_leaf(root, &path);
8402                         if (ret < 0) {
8403                                 goto out;
8404                         } else if (ret > 0) {
8405                                 ret = 0;
8406                                 goto out;
8407                         }
8408                 }
8409                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8410         }
8411
8412         while (num_bytes) {
8413                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8414                         ret = btrfs_next_leaf(root, &path);
8415                         if (ret < 0) {
8416                                 fprintf(stderr, "Error going to next leaf "
8417                                         "%d\n", ret);
8418                                 btrfs_release_path(&path);
8419                                 return ret;
8420                         } else if (ret) {
8421                                 break;
8422                         }
8423                 }
8424                 leaf = path.nodes[0];
8425                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8426                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8427                         path.slots[0]++;
8428                         continue;
8429                 }
8430                 if (key.objectid + key.offset < bytenr) {
8431                         path.slots[0]++;
8432                         continue;
8433                 }
8434                 if (key.objectid > bytenr + num_bytes)
8435                         break;
8436
8437                 if (key.objectid == bytenr) {
8438                         if (key.offset >= num_bytes) {
8439                                 num_bytes = 0;
8440                                 break;
8441                         }
8442                         num_bytes -= key.offset;
8443                         bytenr += key.offset;
8444                 } else if (key.objectid < bytenr) {
8445                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8446                                 num_bytes = 0;
8447                                 break;
8448                         }
8449                         num_bytes = (bytenr + num_bytes) -
8450                                 (key.objectid + key.offset);
8451                         bytenr = key.objectid + key.offset;
8452                 } else {
8453                         if (key.objectid + key.offset < bytenr + num_bytes) {
8454                                 u64 new_start = key.objectid + key.offset;
8455                                 u64 new_bytes = bytenr + num_bytes - new_start;
8456
8457                                 /*
8458                                  * Weird case, the extent is in the middle of
8459                                  * our range, we'll have to search one side
8460                                  * and then the other.  Not sure if this happens
8461                                  * in real life, but no harm in coding it up
8462                                  * anyway just in case.
8463                                  */
8464                                 btrfs_release_path(&path);
8465                                 ret = check_extent_exists(root, new_start,
8466                                                           new_bytes);
8467                                 if (ret) {
8468                                         fprintf(stderr, "Right section didn't "
8469                                                 "have a record\n");
8470                                         break;
8471                                 }
8472                                 num_bytes = key.objectid - bytenr;
8473                                 goto again;
8474                         }
8475                         num_bytes = key.objectid - bytenr;
8476                 }
8477                 path.slots[0]++;
8478         }
8479         ret = 0;
8480
8481 out:
8482         if (num_bytes && !ret) {
8483                 fprintf(stderr, "There are no extents for csum range "
8484                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8485                 ret = 1;
8486         }
8487
8488         btrfs_release_path(&path);
8489         return ret;
8490 }
8491
8492 static int check_csums(struct btrfs_root *root)
8493 {
8494         struct btrfs_path path;
8495         struct extent_buffer *leaf;
8496         struct btrfs_key key;
8497         u64 offset = 0, num_bytes = 0;
8498         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8499         int errors = 0;
8500         int ret;
8501         u64 data_len;
8502         unsigned long leaf_offset;
8503
8504         root = root->fs_info->csum_root;
8505         if (!extent_buffer_uptodate(root->node)) {
8506                 fprintf(stderr, "No valid csum tree found\n");
8507                 return -ENOENT;
8508         }
8509
8510         btrfs_init_path(&path);
8511         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8512         key.type = BTRFS_EXTENT_CSUM_KEY;
8513         key.offset = 0;
8514         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8515         if (ret < 0) {
8516                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8517                 btrfs_release_path(&path);
8518                 return ret;
8519         }
8520
8521         if (ret > 0 && path.slots[0])
8522                 path.slots[0]--;
8523         ret = 0;
8524
8525         while (1) {
8526                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8527                         ret = btrfs_next_leaf(root, &path);
8528                         if (ret < 0) {
8529                                 fprintf(stderr, "Error going to next leaf "
8530                                         "%d\n", ret);
8531                                 break;
8532                         }
8533                         if (ret)
8534                                 break;
8535                 }
8536                 leaf = path.nodes[0];
8537
8538                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8539                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8540                         path.slots[0]++;
8541                         continue;
8542                 }
8543
8544                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8545                               csum_size) * root->fs_info->sectorsize;
8546                 if (!check_data_csum)
8547                         goto skip_csum_check;
8548                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8549                 ret = check_extent_csums(root, key.offset, data_len,
8550                                          leaf_offset, leaf);
8551                 if (ret)
8552                         break;
8553 skip_csum_check:
8554                 if (!num_bytes) {
8555                         offset = key.offset;
8556                 } else if (key.offset != offset + num_bytes) {
8557                         ret = check_extent_exists(root, offset, num_bytes);
8558                         if (ret) {
8559                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8560                                         "there is no extent record\n",
8561                                         offset, offset+num_bytes);
8562                                 errors++;
8563                         }
8564                         offset = key.offset;
8565                         num_bytes = 0;
8566                 }
8567                 num_bytes += data_len;
8568                 path.slots[0]++;
8569         }
8570
8571         btrfs_release_path(&path);
8572         return errors;
8573 }
8574
8575 static int is_dropped_key(struct btrfs_key *key,
8576                           struct btrfs_key *drop_key) {
8577         if (key->objectid < drop_key->objectid)
8578                 return 1;
8579         else if (key->objectid == drop_key->objectid) {
8580                 if (key->type < drop_key->type)
8581                         return 1;
8582                 else if (key->type == drop_key->type) {
8583                         if (key->offset < drop_key->offset)
8584                                 return 1;
8585                 }
8586         }
8587         return 0;
8588 }
8589
8590 /*
8591  * Here are the rules for FULL_BACKREF.
8592  *
8593  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8594  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8595  *      FULL_BACKREF set.
8596  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8597  *    if it happened after the relocation occurred since we'll have dropped the
8598  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8599  *    have no real way to know for sure.
8600  *
8601  * We process the blocks one root at a time, and we start from the lowest root
8602  * objectid and go to the highest.  So we can just lookup the owner backref for
8603  * the record and if we don't find it then we know it doesn't exist and we have
8604  * a FULL BACKREF.
8605  *
8606  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8607  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8608  * be set or not and then we can check later once we've gathered all the refs.
8609  */
8610 static int calc_extent_flag(struct cache_tree *extent_cache,
8611                            struct extent_buffer *buf,
8612                            struct root_item_record *ri,
8613                            u64 *flags)
8614 {
8615         struct extent_record *rec;
8616         struct cache_extent *cache;
8617         struct tree_backref *tback;
8618         u64 owner = 0;
8619
8620         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8621         /* we have added this extent before */
8622         if (!cache)
8623                 return -ENOENT;
8624
8625         rec = container_of(cache, struct extent_record, cache);
8626
8627         /*
8628          * Except file/reloc tree, we can not have
8629          * FULL BACKREF MODE
8630          */
8631         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8632                 goto normal;
8633         /*
8634          * root node
8635          */
8636         if (buf->start == ri->bytenr)
8637                 goto normal;
8638
8639         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8640                 goto full_backref;
8641
8642         owner = btrfs_header_owner(buf);
8643         if (owner == ri->objectid)
8644                 goto normal;
8645
8646         tback = find_tree_backref(rec, 0, owner);
8647         if (!tback)
8648                 goto full_backref;
8649 normal:
8650         *flags = 0;
8651         if (rec->flag_block_full_backref != FLAG_UNSET &&
8652             rec->flag_block_full_backref != 0)
8653                 rec->bad_full_backref = 1;
8654         return 0;
8655 full_backref:
8656         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8657         if (rec->flag_block_full_backref != FLAG_UNSET &&
8658             rec->flag_block_full_backref != 1)
8659                 rec->bad_full_backref = 1;
8660         return 0;
8661 }
8662
8663 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8664 {
8665         fprintf(stderr, "Invalid key type(");
8666         print_key_type(stderr, 0, key_type);
8667         fprintf(stderr, ") found in root(");
8668         print_objectid(stderr, rootid, 0);
8669         fprintf(stderr, ")\n");
8670 }
8671
8672 /*
8673  * Check if the key is valid with its extent buffer.
8674  *
8675  * This is a early check in case invalid key exists in a extent buffer
8676  * This is not comprehensive yet, but should prevent wrong key/item passed
8677  * further
8678  */
8679 static int check_type_with_root(u64 rootid, u8 key_type)
8680 {
8681         switch (key_type) {
8682         /* Only valid in chunk tree */
8683         case BTRFS_DEV_ITEM_KEY:
8684         case BTRFS_CHUNK_ITEM_KEY:
8685                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8686                         goto err;
8687                 break;
8688         /* valid in csum and log tree */
8689         case BTRFS_CSUM_TREE_OBJECTID:
8690                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8691                       is_fstree(rootid)))
8692                         goto err;
8693                 break;
8694         case BTRFS_EXTENT_ITEM_KEY:
8695         case BTRFS_METADATA_ITEM_KEY:
8696         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8697                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8698                         goto err;
8699                 break;
8700         case BTRFS_ROOT_ITEM_KEY:
8701                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8702                         goto err;
8703                 break;
8704         case BTRFS_DEV_EXTENT_KEY:
8705                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8706                         goto err;
8707                 break;
8708         }
8709         return 0;
8710 err:
8711         report_mismatch_key_root(key_type, rootid);
8712         return -EINVAL;
8713 }
8714
8715 static int run_next_block(struct btrfs_root *root,
8716                           struct block_info *bits,
8717                           int bits_nr,
8718                           u64 *last,
8719                           struct cache_tree *pending,
8720                           struct cache_tree *seen,
8721                           struct cache_tree *reada,
8722                           struct cache_tree *nodes,
8723                           struct cache_tree *extent_cache,
8724                           struct cache_tree *chunk_cache,
8725                           struct rb_root *dev_cache,
8726                           struct block_group_tree *block_group_cache,
8727                           struct device_extent_tree *dev_extent_cache,
8728                           struct root_item_record *ri)
8729 {
8730         struct btrfs_fs_info *fs_info = root->fs_info;
8731         struct extent_buffer *buf;
8732         struct extent_record *rec = NULL;
8733         u64 bytenr;
8734         u32 size;
8735         u64 parent;
8736         u64 owner;
8737         u64 flags;
8738         u64 ptr;
8739         u64 gen = 0;
8740         int ret = 0;
8741         int i;
8742         int nritems;
8743         struct btrfs_key key;
8744         struct cache_extent *cache;
8745         int reada_bits;
8746
8747         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8748                                     bits_nr, &reada_bits);
8749         if (nritems == 0)
8750                 return 1;
8751
8752         if (!reada_bits) {
8753                 for(i = 0; i < nritems; i++) {
8754                         ret = add_cache_extent(reada, bits[i].start,
8755                                                bits[i].size);
8756                         if (ret == -EEXIST)
8757                                 continue;
8758
8759                         /* fixme, get the parent transid */
8760                         readahead_tree_block(fs_info, bits[i].start, 0);
8761                 }
8762         }
8763         *last = bits[0].start;
8764         bytenr = bits[0].start;
8765         size = bits[0].size;
8766
8767         cache = lookup_cache_extent(pending, bytenr, size);
8768         if (cache) {
8769                 remove_cache_extent(pending, cache);
8770                 free(cache);
8771         }
8772         cache = lookup_cache_extent(reada, bytenr, size);
8773         if (cache) {
8774                 remove_cache_extent(reada, cache);
8775                 free(cache);
8776         }
8777         cache = lookup_cache_extent(nodes, bytenr, size);
8778         if (cache) {
8779                 remove_cache_extent(nodes, cache);
8780                 free(cache);
8781         }
8782         cache = lookup_cache_extent(extent_cache, bytenr, size);
8783         if (cache) {
8784                 rec = container_of(cache, struct extent_record, cache);
8785                 gen = rec->parent_generation;
8786         }
8787
8788         /* fixme, get the real parent transid */
8789         buf = read_tree_block(root->fs_info, bytenr, gen);
8790         if (!extent_buffer_uptodate(buf)) {
8791                 record_bad_block_io(root->fs_info,
8792                                     extent_cache, bytenr, size);
8793                 goto out;
8794         }
8795
8796         nritems = btrfs_header_nritems(buf);
8797
8798         flags = 0;
8799         if (!init_extent_tree) {
8800                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8801                                        btrfs_header_level(buf), 1, NULL,
8802                                        &flags);
8803                 if (ret < 0) {
8804                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8805                         if (ret < 0) {
8806                                 fprintf(stderr, "Couldn't calc extent flags\n");
8807                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8808                         }
8809                 }
8810         } else {
8811                 flags = 0;
8812                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8813                 if (ret < 0) {
8814                         fprintf(stderr, "Couldn't calc extent flags\n");
8815                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8816                 }
8817         }
8818
8819         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8820                 if (ri != NULL &&
8821                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8822                     ri->objectid == btrfs_header_owner(buf)) {
8823                         /*
8824                          * Ok we got to this block from it's original owner and
8825                          * we have FULL_BACKREF set.  Relocation can leave
8826                          * converted blocks over so this is altogether possible,
8827                          * however it's not possible if the generation > the
8828                          * last snapshot, so check for this case.
8829                          */
8830                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8831                             btrfs_header_generation(buf) > ri->last_snapshot) {
8832                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8833                                 rec->bad_full_backref = 1;
8834                         }
8835                 }
8836         } else {
8837                 if (ri != NULL &&
8838                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8839                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8840                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8841                         rec->bad_full_backref = 1;
8842                 }
8843         }
8844
8845         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8846                 rec->flag_block_full_backref = 1;
8847                 parent = bytenr;
8848                 owner = 0;
8849         } else {
8850                 rec->flag_block_full_backref = 0;
8851                 parent = 0;
8852                 owner = btrfs_header_owner(buf);
8853         }
8854
8855         ret = check_block(root, extent_cache, buf, flags);
8856         if (ret)
8857                 goto out;
8858
8859         if (btrfs_is_leaf(buf)) {
8860                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8861                 for (i = 0; i < nritems; i++) {
8862                         struct btrfs_file_extent_item *fi;
8863                         btrfs_item_key_to_cpu(buf, &key, i);
8864                         /*
8865                          * Check key type against the leaf owner.
8866                          * Could filter quite a lot of early error if
8867                          * owner is correct
8868                          */
8869                         if (check_type_with_root(btrfs_header_owner(buf),
8870                                                  key.type)) {
8871                                 fprintf(stderr, "ignoring invalid key\n");
8872                                 continue;
8873                         }
8874                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8875                                 process_extent_item(root, extent_cache, buf,
8876                                                     i);
8877                                 continue;
8878                         }
8879                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8880                                 process_extent_item(root, extent_cache, buf,
8881                                                     i);
8882                                 continue;
8883                         }
8884                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8885                                 total_csum_bytes +=
8886                                         btrfs_item_size_nr(buf, i);
8887                                 continue;
8888                         }
8889                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8890                                 process_chunk_item(chunk_cache, &key, buf, i);
8891                                 continue;
8892                         }
8893                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8894                                 process_device_item(dev_cache, &key, buf, i);
8895                                 continue;
8896                         }
8897                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8898                                 process_block_group_item(block_group_cache,
8899                                         &key, buf, i);
8900                                 continue;
8901                         }
8902                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8903                                 process_device_extent_item(dev_extent_cache,
8904                                         &key, buf, i);
8905                                 continue;
8906
8907                         }
8908                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8909 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8910                                 process_extent_ref_v0(extent_cache, buf, i);
8911 #else
8912                                 BUG();
8913 #endif
8914                                 continue;
8915                         }
8916
8917                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8918                                 ret = add_tree_backref(extent_cache,
8919                                                 key.objectid, 0, key.offset, 0);
8920                                 if (ret < 0)
8921                                         error(
8922                                 "add_tree_backref failed (leaf tree block): %s",
8923                                               strerror(-ret));
8924                                 continue;
8925                         }
8926                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8927                                 ret = add_tree_backref(extent_cache,
8928                                                 key.objectid, key.offset, 0, 0);
8929                                 if (ret < 0)
8930                                         error(
8931                                 "add_tree_backref failed (leaf shared block): %s",
8932                                               strerror(-ret));
8933                                 continue;
8934                         }
8935                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8936                                 struct btrfs_extent_data_ref *ref;
8937                                 ref = btrfs_item_ptr(buf, i,
8938                                                 struct btrfs_extent_data_ref);
8939                                 add_data_backref(extent_cache,
8940                                         key.objectid, 0,
8941                                         btrfs_extent_data_ref_root(buf, ref),
8942                                         btrfs_extent_data_ref_objectid(buf,
8943                                                                        ref),
8944                                         btrfs_extent_data_ref_offset(buf, ref),
8945                                         btrfs_extent_data_ref_count(buf, ref),
8946                                         0, root->fs_info->sectorsize);
8947                                 continue;
8948                         }
8949                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8950                                 struct btrfs_shared_data_ref *ref;
8951                                 ref = btrfs_item_ptr(buf, i,
8952                                                 struct btrfs_shared_data_ref);
8953                                 add_data_backref(extent_cache,
8954                                         key.objectid, key.offset, 0, 0, 0,
8955                                         btrfs_shared_data_ref_count(buf, ref),
8956                                         0, root->fs_info->sectorsize);
8957                                 continue;
8958                         }
8959                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8960                                 struct bad_item *bad;
8961
8962                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8963                                         continue;
8964                                 if (!owner)
8965                                         continue;
8966                                 bad = malloc(sizeof(struct bad_item));
8967                                 if (!bad)
8968                                         continue;
8969                                 INIT_LIST_HEAD(&bad->list);
8970                                 memcpy(&bad->key, &key,
8971                                        sizeof(struct btrfs_key));
8972                                 bad->root_id = owner;
8973                                 list_add_tail(&bad->list, &delete_items);
8974                                 continue;
8975                         }
8976                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8977                                 continue;
8978                         fi = btrfs_item_ptr(buf, i,
8979                                             struct btrfs_file_extent_item);
8980                         if (btrfs_file_extent_type(buf, fi) ==
8981                             BTRFS_FILE_EXTENT_INLINE)
8982                                 continue;
8983                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8984                                 continue;
8985
8986                         data_bytes_allocated +=
8987                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8988                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8989                                 abort();
8990                         }
8991                         data_bytes_referenced +=
8992                                 btrfs_file_extent_num_bytes(buf, fi);
8993                         add_data_backref(extent_cache,
8994                                 btrfs_file_extent_disk_bytenr(buf, fi),
8995                                 parent, owner, key.objectid, key.offset -
8996                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8997                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8998                 }
8999         } else {
9000                 int level;
9001                 struct btrfs_key first_key;
9002
9003                 first_key.objectid = 0;
9004
9005                 if (nritems > 0)
9006                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9007                 level = btrfs_header_level(buf);
9008                 for (i = 0; i < nritems; i++) {
9009                         struct extent_record tmpl;
9010
9011                         ptr = btrfs_node_blockptr(buf, i);
9012                         size = root->fs_info->nodesize;
9013                         btrfs_node_key_to_cpu(buf, &key, i);
9014                         if (ri != NULL) {
9015                                 if ((level == ri->drop_level)
9016                                     && is_dropped_key(&key, &ri->drop_key)) {
9017                                         continue;
9018                                 }
9019                         }
9020
9021                         memset(&tmpl, 0, sizeof(tmpl));
9022                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9023                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9024                         tmpl.start = ptr;
9025                         tmpl.nr = size;
9026                         tmpl.refs = 1;
9027                         tmpl.metadata = 1;
9028                         tmpl.max_size = size;
9029                         ret = add_extent_rec(extent_cache, &tmpl);
9030                         if (ret < 0)
9031                                 goto out;
9032
9033                         ret = add_tree_backref(extent_cache, ptr, parent,
9034                                         owner, 1);
9035                         if (ret < 0) {
9036                                 error(
9037                                 "add_tree_backref failed (non-leaf block): %s",
9038                                       strerror(-ret));
9039                                 continue;
9040                         }
9041
9042                         if (level > 1) {
9043                                 add_pending(nodes, seen, ptr, size);
9044                         } else {
9045                                 add_pending(pending, seen, ptr, size);
9046                         }
9047                 }
9048                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9049                                       nritems) * sizeof(struct btrfs_key_ptr);
9050         }
9051         total_btree_bytes += buf->len;
9052         if (fs_root_objectid(btrfs_header_owner(buf)))
9053                 total_fs_tree_bytes += buf->len;
9054         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9055                 total_extent_tree_bytes += buf->len;
9056 out:
9057         free_extent_buffer(buf);
9058         return ret;
9059 }
9060
9061 static int add_root_to_pending(struct extent_buffer *buf,
9062                                struct cache_tree *extent_cache,
9063                                struct cache_tree *pending,
9064                                struct cache_tree *seen,
9065                                struct cache_tree *nodes,
9066                                u64 objectid)
9067 {
9068         struct extent_record tmpl;
9069         int ret;
9070
9071         if (btrfs_header_level(buf) > 0)
9072                 add_pending(nodes, seen, buf->start, buf->len);
9073         else
9074                 add_pending(pending, seen, buf->start, buf->len);
9075
9076         memset(&tmpl, 0, sizeof(tmpl));
9077         tmpl.start = buf->start;
9078         tmpl.nr = buf->len;
9079         tmpl.is_root = 1;
9080         tmpl.refs = 1;
9081         tmpl.metadata = 1;
9082         tmpl.max_size = buf->len;
9083         add_extent_rec(extent_cache, &tmpl);
9084
9085         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9086             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9087                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9088                                 0, 1);
9089         else
9090                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9091                                 1);
9092         return ret;
9093 }
9094
9095 /* as we fix the tree, we might be deleting blocks that
9096  * we're tracking for repair.  This hook makes sure we
9097  * remove any backrefs for blocks as we are fixing them.
9098  */
9099 static int free_extent_hook(struct btrfs_trans_handle *trans,
9100                             struct btrfs_root *root,
9101                             u64 bytenr, u64 num_bytes, u64 parent,
9102                             u64 root_objectid, u64 owner, u64 offset,
9103                             int refs_to_drop)
9104 {
9105         struct extent_record *rec;
9106         struct cache_extent *cache;
9107         int is_data;
9108         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9109
9110         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9111         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9112         if (!cache)
9113                 return 0;
9114
9115         rec = container_of(cache, struct extent_record, cache);
9116         if (is_data) {
9117                 struct data_backref *back;
9118                 back = find_data_backref(rec, parent, root_objectid, owner,
9119                                          offset, 1, bytenr, num_bytes);
9120                 if (!back)
9121                         goto out;
9122                 if (back->node.found_ref) {
9123                         back->found_ref -= refs_to_drop;
9124                         if (rec->refs)
9125                                 rec->refs -= refs_to_drop;
9126                 }
9127                 if (back->node.found_extent_tree) {
9128                         back->num_refs -= refs_to_drop;
9129                         if (rec->extent_item_refs)
9130                                 rec->extent_item_refs -= refs_to_drop;
9131                 }
9132                 if (back->found_ref == 0)
9133                         back->node.found_ref = 0;
9134                 if (back->num_refs == 0)
9135                         back->node.found_extent_tree = 0;
9136
9137                 if (!back->node.found_extent_tree && back->node.found_ref) {
9138                         rb_erase(&back->node.node, &rec->backref_tree);
9139                         free(back);
9140                 }
9141         } else {
9142                 struct tree_backref *back;
9143                 back = find_tree_backref(rec, parent, root_objectid);
9144                 if (!back)
9145                         goto out;
9146                 if (back->node.found_ref) {
9147                         if (rec->refs)
9148                                 rec->refs--;
9149                         back->node.found_ref = 0;
9150                 }
9151                 if (back->node.found_extent_tree) {
9152                         if (rec->extent_item_refs)
9153                                 rec->extent_item_refs--;
9154                         back->node.found_extent_tree = 0;
9155                 }
9156                 if (!back->node.found_extent_tree && back->node.found_ref) {
9157                         rb_erase(&back->node.node, &rec->backref_tree);
9158                         free(back);
9159                 }
9160         }
9161         maybe_free_extent_rec(extent_cache, rec);
9162 out:
9163         return 0;
9164 }
9165
9166 static int delete_extent_records(struct btrfs_trans_handle *trans,
9167                                  struct btrfs_root *root,
9168                                  struct btrfs_path *path,
9169                                  u64 bytenr)
9170 {
9171         struct btrfs_key key;
9172         struct btrfs_key found_key;
9173         struct extent_buffer *leaf;
9174         int ret;
9175         int slot;
9176
9177
9178         key.objectid = bytenr;
9179         key.type = (u8)-1;
9180         key.offset = (u64)-1;
9181
9182         while(1) {
9183                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9184                                         &key, path, 0, 1);
9185                 if (ret < 0)
9186                         break;
9187
9188                 if (ret > 0) {
9189                         ret = 0;
9190                         if (path->slots[0] == 0)
9191                                 break;
9192                         path->slots[0]--;
9193                 }
9194                 ret = 0;
9195
9196                 leaf = path->nodes[0];
9197                 slot = path->slots[0];
9198
9199                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9200                 if (found_key.objectid != bytenr)
9201                         break;
9202
9203                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9204                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9205                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9206                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9207                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9208                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9209                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9210                         btrfs_release_path(path);
9211                         if (found_key.type == 0) {
9212                                 if (found_key.offset == 0)
9213                                         break;
9214                                 key.offset = found_key.offset - 1;
9215                                 key.type = found_key.type;
9216                         }
9217                         key.type = found_key.type - 1;
9218                         key.offset = (u64)-1;
9219                         continue;
9220                 }
9221
9222                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9223                         found_key.objectid, found_key.type, found_key.offset);
9224
9225                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9226                 if (ret)
9227                         break;
9228                 btrfs_release_path(path);
9229
9230                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9231                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9232                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9233                                 found_key.offset : root->fs_info->nodesize;
9234
9235                         ret = btrfs_update_block_group(trans, root, bytenr,
9236                                                        bytes, 0, 0);
9237                         if (ret)
9238                                 break;
9239                 }
9240         }
9241
9242         btrfs_release_path(path);
9243         return ret;
9244 }
9245
9246 /*
9247  * for a single backref, this will allocate a new extent
9248  * and add the backref to it.
9249  */
9250 static int record_extent(struct btrfs_trans_handle *trans,
9251                          struct btrfs_fs_info *info,
9252                          struct btrfs_path *path,
9253                          struct extent_record *rec,
9254                          struct extent_backref *back,
9255                          int allocated, u64 flags)
9256 {
9257         int ret = 0;
9258         struct btrfs_root *extent_root = info->extent_root;
9259         struct extent_buffer *leaf;
9260         struct btrfs_key ins_key;
9261         struct btrfs_extent_item *ei;
9262         struct data_backref *dback;
9263         struct btrfs_tree_block_info *bi;
9264
9265         if (!back->is_data)
9266                 rec->max_size = max_t(u64, rec->max_size,
9267                                     info->nodesize);
9268
9269         if (!allocated) {
9270                 u32 item_size = sizeof(*ei);
9271
9272                 if (!back->is_data)
9273                         item_size += sizeof(*bi);
9274
9275                 ins_key.objectid = rec->start;
9276                 ins_key.offset = rec->max_size;
9277                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9278
9279                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9280                                         &ins_key, item_size);
9281                 if (ret)
9282                         goto fail;
9283
9284                 leaf = path->nodes[0];
9285                 ei = btrfs_item_ptr(leaf, path->slots[0],
9286                                     struct btrfs_extent_item);
9287
9288                 btrfs_set_extent_refs(leaf, ei, 0);
9289                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9290
9291                 if (back->is_data) {
9292                         btrfs_set_extent_flags(leaf, ei,
9293                                                BTRFS_EXTENT_FLAG_DATA);
9294                 } else {
9295                         struct btrfs_disk_key copy_key;;
9296
9297                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9298                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9299                                              sizeof(*bi));
9300
9301                         btrfs_set_disk_key_objectid(&copy_key,
9302                                                     rec->info_objectid);
9303                         btrfs_set_disk_key_type(&copy_key, 0);
9304                         btrfs_set_disk_key_offset(&copy_key, 0);
9305
9306                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9307                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9308
9309                         btrfs_set_extent_flags(leaf, ei,
9310                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9311                 }
9312
9313                 btrfs_mark_buffer_dirty(leaf);
9314                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9315                                                rec->max_size, 1, 0);
9316                 if (ret)
9317                         goto fail;
9318                 btrfs_release_path(path);
9319         }
9320
9321         if (back->is_data) {
9322                 u64 parent;
9323                 int i;
9324
9325                 dback = to_data_backref(back);
9326                 if (back->full_backref)
9327                         parent = dback->parent;
9328                 else
9329                         parent = 0;
9330
9331                 for (i = 0; i < dback->found_ref; i++) {
9332                         /* if parent != 0, we're doing a full backref
9333                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9334                          * just makes the backref allocator create a data
9335                          * backref
9336                          */
9337                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9338                                                    rec->start, rec->max_size,
9339                                                    parent,
9340                                                    dback->root,
9341                                                    parent ?
9342                                                    BTRFS_FIRST_FREE_OBJECTID :
9343                                                    dback->owner,
9344                                                    dback->offset);
9345                         if (ret)
9346                                 break;
9347                 }
9348                 fprintf(stderr, "adding new data backref"
9349                                 " on %llu %s %llu owner %llu"
9350                                 " offset %llu found %d\n",
9351                                 (unsigned long long)rec->start,
9352                                 back->full_backref ?
9353                                 "parent" : "root",
9354                                 back->full_backref ?
9355                                 (unsigned long long)parent :
9356                                 (unsigned long long)dback->root,
9357                                 (unsigned long long)dback->owner,
9358                                 (unsigned long long)dback->offset,
9359                                 dback->found_ref);
9360         } else {
9361                 u64 parent;
9362                 struct tree_backref *tback;
9363
9364                 tback = to_tree_backref(back);
9365                 if (back->full_backref)
9366                         parent = tback->parent;
9367                 else
9368                         parent = 0;
9369
9370                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9371                                            rec->start, rec->max_size,
9372                                            parent, tback->root, 0, 0);
9373                 fprintf(stderr, "adding new tree backref on "
9374                         "start %llu len %llu parent %llu root %llu\n",
9375                         rec->start, rec->max_size, parent, tback->root);
9376         }
9377 fail:
9378         btrfs_release_path(path);
9379         return ret;
9380 }
9381
9382 static struct extent_entry *find_entry(struct list_head *entries,
9383                                        u64 bytenr, u64 bytes)
9384 {
9385         struct extent_entry *entry = NULL;
9386
9387         list_for_each_entry(entry, entries, list) {
9388                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9389                         return entry;
9390         }
9391
9392         return NULL;
9393 }
9394
9395 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9396 {
9397         struct extent_entry *entry, *best = NULL, *prev = NULL;
9398
9399         list_for_each_entry(entry, entries, list) {
9400                 /*
9401                  * If there are as many broken entries as entries then we know
9402                  * not to trust this particular entry.
9403                  */
9404                 if (entry->broken == entry->count)
9405                         continue;
9406
9407                 /*
9408                  * Special case, when there are only two entries and 'best' is
9409                  * the first one
9410                  */
9411                 if (!prev) {
9412                         best = entry;
9413                         prev = entry;
9414                         continue;
9415                 }
9416
9417                 /*
9418                  * If our current entry == best then we can't be sure our best
9419                  * is really the best, so we need to keep searching.
9420                  */
9421                 if (best && best->count == entry->count) {
9422                         prev = entry;
9423                         best = NULL;
9424                         continue;
9425                 }
9426
9427                 /* Prev == entry, not good enough, have to keep searching */
9428                 if (!prev->broken && prev->count == entry->count)
9429                         continue;
9430
9431                 if (!best)
9432                         best = (prev->count > entry->count) ? prev : entry;
9433                 else if (best->count < entry->count)
9434                         best = entry;
9435                 prev = entry;
9436         }
9437
9438         return best;
9439 }
9440
9441 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9442                       struct data_backref *dback, struct extent_entry *entry)
9443 {
9444         struct btrfs_trans_handle *trans;
9445         struct btrfs_root *root;
9446         struct btrfs_file_extent_item *fi;
9447         struct extent_buffer *leaf;
9448         struct btrfs_key key;
9449         u64 bytenr, bytes;
9450         int ret, err;
9451
9452         key.objectid = dback->root;
9453         key.type = BTRFS_ROOT_ITEM_KEY;
9454         key.offset = (u64)-1;
9455         root = btrfs_read_fs_root(info, &key);
9456         if (IS_ERR(root)) {
9457                 fprintf(stderr, "Couldn't find root for our ref\n");
9458                 return -EINVAL;
9459         }
9460
9461         /*
9462          * The backref points to the original offset of the extent if it was
9463          * split, so we need to search down to the offset we have and then walk
9464          * forward until we find the backref we're looking for.
9465          */
9466         key.objectid = dback->owner;
9467         key.type = BTRFS_EXTENT_DATA_KEY;
9468         key.offset = dback->offset;
9469         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9470         if (ret < 0) {
9471                 fprintf(stderr, "Error looking up ref %d\n", ret);
9472                 return ret;
9473         }
9474
9475         while (1) {
9476                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9477                         ret = btrfs_next_leaf(root, path);
9478                         if (ret) {
9479                                 fprintf(stderr, "Couldn't find our ref, next\n");
9480                                 return -EINVAL;
9481                         }
9482                 }
9483                 leaf = path->nodes[0];
9484                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9485                 if (key.objectid != dback->owner ||
9486                     key.type != BTRFS_EXTENT_DATA_KEY) {
9487                         fprintf(stderr, "Couldn't find our ref, search\n");
9488                         return -EINVAL;
9489                 }
9490                 fi = btrfs_item_ptr(leaf, path->slots[0],
9491                                     struct btrfs_file_extent_item);
9492                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9493                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9494
9495                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9496                         break;
9497                 path->slots[0]++;
9498         }
9499
9500         btrfs_release_path(path);
9501
9502         trans = btrfs_start_transaction(root, 1);
9503         if (IS_ERR(trans))
9504                 return PTR_ERR(trans);
9505
9506         /*
9507          * Ok we have the key of the file extent we want to fix, now we can cow
9508          * down to the thing and fix it.
9509          */
9510         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9511         if (ret < 0) {
9512                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9513                         key.objectid, key.type, key.offset, ret);
9514                 goto out;
9515         }
9516         if (ret > 0) {
9517                 fprintf(stderr, "Well that's odd, we just found this key "
9518                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9519                         key.offset);
9520                 ret = -EINVAL;
9521                 goto out;
9522         }
9523         leaf = path->nodes[0];
9524         fi = btrfs_item_ptr(leaf, path->slots[0],
9525                             struct btrfs_file_extent_item);
9526
9527         if (btrfs_file_extent_compression(leaf, fi) &&
9528             dback->disk_bytenr != entry->bytenr) {
9529                 fprintf(stderr, "Ref doesn't match the record start and is "
9530                         "compressed, please take a btrfs-image of this file "
9531                         "system and send it to a btrfs developer so they can "
9532                         "complete this functionality for bytenr %Lu\n",
9533                         dback->disk_bytenr);
9534                 ret = -EINVAL;
9535                 goto out;
9536         }
9537
9538         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9539                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9540         } else if (dback->disk_bytenr > entry->bytenr) {
9541                 u64 off_diff, offset;
9542
9543                 off_diff = dback->disk_bytenr - entry->bytenr;
9544                 offset = btrfs_file_extent_offset(leaf, fi);
9545                 if (dback->disk_bytenr + offset +
9546                     btrfs_file_extent_num_bytes(leaf, fi) >
9547                     entry->bytenr + entry->bytes) {
9548                         fprintf(stderr, "Ref is past the entry end, please "
9549                                 "take a btrfs-image of this file system and "
9550                                 "send it to a btrfs developer, ref %Lu\n",
9551                                 dback->disk_bytenr);
9552                         ret = -EINVAL;
9553                         goto out;
9554                 }
9555                 offset += off_diff;
9556                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9557                 btrfs_set_file_extent_offset(leaf, fi, offset);
9558         } else if (dback->disk_bytenr < entry->bytenr) {
9559                 u64 offset;
9560
9561                 offset = btrfs_file_extent_offset(leaf, fi);
9562                 if (dback->disk_bytenr + offset < entry->bytenr) {
9563                         fprintf(stderr, "Ref is before the entry start, please"
9564                                 " take a btrfs-image of this file system and "
9565                                 "send it to a btrfs developer, ref %Lu\n",
9566                                 dback->disk_bytenr);
9567                         ret = -EINVAL;
9568                         goto out;
9569                 }
9570
9571                 offset += dback->disk_bytenr;
9572                 offset -= entry->bytenr;
9573                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9574                 btrfs_set_file_extent_offset(leaf, fi, offset);
9575         }
9576
9577         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9578
9579         /*
9580          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9581          * only do this if we aren't using compression, otherwise it's a
9582          * trickier case.
9583          */
9584         if (!btrfs_file_extent_compression(leaf, fi))
9585                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9586         else
9587                 printf("ram bytes may be wrong?\n");
9588         btrfs_mark_buffer_dirty(leaf);
9589 out:
9590         err = btrfs_commit_transaction(trans, root);
9591         btrfs_release_path(path);
9592         return ret ? ret : err;
9593 }
9594
9595 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9596                            struct extent_record *rec)
9597 {
9598         struct extent_backref *back, *tmp;
9599         struct data_backref *dback;
9600         struct extent_entry *entry, *best = NULL;
9601         LIST_HEAD(entries);
9602         int nr_entries = 0;
9603         int broken_entries = 0;
9604         int ret = 0;
9605         short mismatch = 0;
9606
9607         /*
9608          * Metadata is easy and the backrefs should always agree on bytenr and
9609          * size, if not we've got bigger issues.
9610          */
9611         if (rec->metadata)
9612                 return 0;
9613
9614         rbtree_postorder_for_each_entry_safe(back, tmp,
9615                                              &rec->backref_tree, node) {
9616                 if (back->full_backref || !back->is_data)
9617                         continue;
9618
9619                 dback = to_data_backref(back);
9620
9621                 /*
9622                  * We only pay attention to backrefs that we found a real
9623                  * backref for.
9624                  */
9625                 if (dback->found_ref == 0)
9626                         continue;
9627
9628                 /*
9629                  * For now we only catch when the bytes don't match, not the
9630                  * bytenr.  We can easily do this at the same time, but I want
9631                  * to have a fs image to test on before we just add repair
9632                  * functionality willy-nilly so we know we won't screw up the
9633                  * repair.
9634                  */
9635
9636                 entry = find_entry(&entries, dback->disk_bytenr,
9637                                    dback->bytes);
9638                 if (!entry) {
9639                         entry = malloc(sizeof(struct extent_entry));
9640                         if (!entry) {
9641                                 ret = -ENOMEM;
9642                                 goto out;
9643                         }
9644                         memset(entry, 0, sizeof(*entry));
9645                         entry->bytenr = dback->disk_bytenr;
9646                         entry->bytes = dback->bytes;
9647                         list_add_tail(&entry->list, &entries);
9648                         nr_entries++;
9649                 }
9650
9651                 /*
9652                  * If we only have on entry we may think the entries agree when
9653                  * in reality they don't so we have to do some extra checking.
9654                  */
9655                 if (dback->disk_bytenr != rec->start ||
9656                     dback->bytes != rec->nr || back->broken)
9657                         mismatch = 1;
9658
9659                 if (back->broken) {
9660                         entry->broken++;
9661                         broken_entries++;
9662                 }
9663
9664                 entry->count++;
9665         }
9666
9667         /* Yay all the backrefs agree, carry on good sir */
9668         if (nr_entries <= 1 && !mismatch)
9669                 goto out;
9670
9671         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9672                 "%Lu\n", rec->start);
9673
9674         /*
9675          * First we want to see if the backrefs can agree amongst themselves who
9676          * is right, so figure out which one of the entries has the highest
9677          * count.
9678          */
9679         best = find_most_right_entry(&entries);
9680
9681         /*
9682          * Ok so we may have an even split between what the backrefs think, so
9683          * this is where we use the extent ref to see what it thinks.
9684          */
9685         if (!best) {
9686                 entry = find_entry(&entries, rec->start, rec->nr);
9687                 if (!entry && (!broken_entries || !rec->found_rec)) {
9688                         fprintf(stderr, "Backrefs don't agree with each other "
9689                                 "and extent record doesn't agree with anybody,"
9690                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9691                                 rec->start, rec->nr);
9692                         ret = -EINVAL;
9693                         goto out;
9694                 } else if (!entry) {
9695                         /*
9696                          * Ok our backrefs were broken, we'll assume this is the
9697                          * correct value and add an entry for this range.
9698                          */
9699                         entry = malloc(sizeof(struct extent_entry));
9700                         if (!entry) {
9701                                 ret = -ENOMEM;
9702                                 goto out;
9703                         }
9704                         memset(entry, 0, sizeof(*entry));
9705                         entry->bytenr = rec->start;
9706                         entry->bytes = rec->nr;
9707                         list_add_tail(&entry->list, &entries);
9708                         nr_entries++;
9709                 }
9710                 entry->count++;
9711                 best = find_most_right_entry(&entries);
9712                 if (!best) {
9713                         fprintf(stderr, "Backrefs and extent record evenly "
9714                                 "split on who is right, this is going to "
9715                                 "require user input to fix bytenr %Lu bytes "
9716                                 "%Lu\n", rec->start, rec->nr);
9717                         ret = -EINVAL;
9718                         goto out;
9719                 }
9720         }
9721
9722         /*
9723          * I don't think this can happen currently as we'll abort() if we catch
9724          * this case higher up, but in case somebody removes that we still can't
9725          * deal with it properly here yet, so just bail out of that's the case.
9726          */
9727         if (best->bytenr != rec->start) {
9728                 fprintf(stderr, "Extent start and backref starts don't match, "
9729                         "please use btrfs-image on this file system and send "
9730                         "it to a btrfs developer so they can make fsck fix "
9731                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9732                         rec->start, rec->nr);
9733                 ret = -EINVAL;
9734                 goto out;
9735         }
9736
9737         /*
9738          * Ok great we all agreed on an extent record, let's go find the real
9739          * references and fix up the ones that don't match.
9740          */
9741         rbtree_postorder_for_each_entry_safe(back, tmp,
9742                                              &rec->backref_tree, node) {
9743                 if (back->full_backref || !back->is_data)
9744                         continue;
9745
9746                 dback = to_data_backref(back);
9747
9748                 /*
9749                  * Still ignoring backrefs that don't have a real ref attached
9750                  * to them.
9751                  */
9752                 if (dback->found_ref == 0)
9753                         continue;
9754
9755                 if (dback->bytes == best->bytes &&
9756                     dback->disk_bytenr == best->bytenr)
9757                         continue;
9758
9759                 ret = repair_ref(info, path, dback, best);
9760                 if (ret)
9761                         goto out;
9762         }
9763
9764         /*
9765          * Ok we messed with the actual refs, which means we need to drop our
9766          * entire cache and go back and rescan.  I know this is a huge pain and
9767          * adds a lot of extra work, but it's the only way to be safe.  Once all
9768          * the backrefs agree we may not need to do anything to the extent
9769          * record itself.
9770          */
9771         ret = -EAGAIN;
9772 out:
9773         while (!list_empty(&entries)) {
9774                 entry = list_entry(entries.next, struct extent_entry, list);
9775                 list_del_init(&entry->list);
9776                 free(entry);
9777         }
9778         return ret;
9779 }
9780
9781 static int process_duplicates(struct cache_tree *extent_cache,
9782                               struct extent_record *rec)
9783 {
9784         struct extent_record *good, *tmp;
9785         struct cache_extent *cache;
9786         int ret;
9787
9788         /*
9789          * If we found a extent record for this extent then return, or if we
9790          * have more than one duplicate we are likely going to need to delete
9791          * something.
9792          */
9793         if (rec->found_rec || rec->num_duplicates > 1)
9794                 return 0;
9795
9796         /* Shouldn't happen but just in case */
9797         BUG_ON(!rec->num_duplicates);
9798
9799         /*
9800          * So this happens if we end up with a backref that doesn't match the
9801          * actual extent entry.  So either the backref is bad or the extent
9802          * entry is bad.  Either way we want to have the extent_record actually
9803          * reflect what we found in the extent_tree, so we need to take the
9804          * duplicate out and use that as the extent_record since the only way we
9805          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9806          */
9807         remove_cache_extent(extent_cache, &rec->cache);
9808
9809         good = to_extent_record(rec->dups.next);
9810         list_del_init(&good->list);
9811         INIT_LIST_HEAD(&good->backrefs);
9812         INIT_LIST_HEAD(&good->dups);
9813         good->cache.start = good->start;
9814         good->cache.size = good->nr;
9815         good->content_checked = 0;
9816         good->owner_ref_checked = 0;
9817         good->num_duplicates = 0;
9818         good->refs = rec->refs;
9819         list_splice_init(&rec->backrefs, &good->backrefs);
9820         while (1) {
9821                 cache = lookup_cache_extent(extent_cache, good->start,
9822                                             good->nr);
9823                 if (!cache)
9824                         break;
9825                 tmp = container_of(cache, struct extent_record, cache);
9826
9827                 /*
9828                  * If we find another overlapping extent and it's found_rec is
9829                  * set then it's a duplicate and we need to try and delete
9830                  * something.
9831                  */
9832                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9833                         if (list_empty(&good->list))
9834                                 list_add_tail(&good->list,
9835                                               &duplicate_extents);
9836                         good->num_duplicates += tmp->num_duplicates + 1;
9837                         list_splice_init(&tmp->dups, &good->dups);
9838                         list_del_init(&tmp->list);
9839                         list_add_tail(&tmp->list, &good->dups);
9840                         remove_cache_extent(extent_cache, &tmp->cache);
9841                         continue;
9842                 }
9843
9844                 /*
9845                  * Ok we have another non extent item backed extent rec, so lets
9846                  * just add it to this extent and carry on like we did above.
9847                  */
9848                 good->refs += tmp->refs;
9849                 list_splice_init(&tmp->backrefs, &good->backrefs);
9850                 remove_cache_extent(extent_cache, &tmp->cache);
9851                 free(tmp);
9852         }
9853         ret = insert_cache_extent(extent_cache, &good->cache);
9854         BUG_ON(ret);
9855         free(rec);
9856         return good->num_duplicates ? 0 : 1;
9857 }
9858
9859 static int delete_duplicate_records(struct btrfs_root *root,
9860                                     struct extent_record *rec)
9861 {
9862         struct btrfs_trans_handle *trans;
9863         LIST_HEAD(delete_list);
9864         struct btrfs_path path;
9865         struct extent_record *tmp, *good, *n;
9866         int nr_del = 0;
9867         int ret = 0, err;
9868         struct btrfs_key key;
9869
9870         btrfs_init_path(&path);
9871
9872         good = rec;
9873         /* Find the record that covers all of the duplicates. */
9874         list_for_each_entry(tmp, &rec->dups, list) {
9875                 if (good->start < tmp->start)
9876                         continue;
9877                 if (good->nr > tmp->nr)
9878                         continue;
9879
9880                 if (tmp->start + tmp->nr < good->start + good->nr) {
9881                         fprintf(stderr, "Ok we have overlapping extents that "
9882                                 "aren't completely covered by each other, this "
9883                                 "is going to require more careful thought.  "
9884                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9885                                 tmp->start, tmp->nr, good->start, good->nr);
9886                         abort();
9887                 }
9888                 good = tmp;
9889         }
9890
9891         if (good != rec)
9892                 list_add_tail(&rec->list, &delete_list);
9893
9894         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9895                 if (tmp == good)
9896                         continue;
9897                 list_move_tail(&tmp->list, &delete_list);
9898         }
9899
9900         root = root->fs_info->extent_root;
9901         trans = btrfs_start_transaction(root, 1);
9902         if (IS_ERR(trans)) {
9903                 ret = PTR_ERR(trans);
9904                 goto out;
9905         }
9906
9907         list_for_each_entry(tmp, &delete_list, list) {
9908                 if (tmp->found_rec == 0)
9909                         continue;
9910                 key.objectid = tmp->start;
9911                 key.type = BTRFS_EXTENT_ITEM_KEY;
9912                 key.offset = tmp->nr;
9913
9914                 /* Shouldn't happen but just in case */
9915                 if (tmp->metadata) {
9916                         fprintf(stderr, "Well this shouldn't happen, extent "
9917                                 "record overlaps but is metadata? "
9918                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9919                         abort();
9920                 }
9921
9922                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9923                 if (ret) {
9924                         if (ret > 0)
9925                                 ret = -EINVAL;
9926                         break;
9927                 }
9928                 ret = btrfs_del_item(trans, root, &path);
9929                 if (ret)
9930                         break;
9931                 btrfs_release_path(&path);
9932                 nr_del++;
9933         }
9934         err = btrfs_commit_transaction(trans, root);
9935         if (err && !ret)
9936                 ret = err;
9937 out:
9938         while (!list_empty(&delete_list)) {
9939                 tmp = to_extent_record(delete_list.next);
9940                 list_del_init(&tmp->list);
9941                 if (tmp == rec)
9942                         continue;
9943                 free(tmp);
9944         }
9945
9946         while (!list_empty(&rec->dups)) {
9947                 tmp = to_extent_record(rec->dups.next);
9948                 list_del_init(&tmp->list);
9949                 free(tmp);
9950         }
9951
9952         btrfs_release_path(&path);
9953
9954         if (!ret && !nr_del)
9955                 rec->num_duplicates = 0;
9956
9957         return ret ? ret : nr_del;
9958 }
9959
9960 static int find_possible_backrefs(struct btrfs_fs_info *info,
9961                                   struct btrfs_path *path,
9962                                   struct cache_tree *extent_cache,
9963                                   struct extent_record *rec)
9964 {
9965         struct btrfs_root *root;
9966         struct extent_backref *back, *tmp;
9967         struct data_backref *dback;
9968         struct cache_extent *cache;
9969         struct btrfs_file_extent_item *fi;
9970         struct btrfs_key key;
9971         u64 bytenr, bytes;
9972         int ret;
9973
9974         rbtree_postorder_for_each_entry_safe(back, tmp,
9975                                              &rec->backref_tree, node) {
9976                 /* Don't care about full backrefs (poor unloved backrefs) */
9977                 if (back->full_backref || !back->is_data)
9978                         continue;
9979
9980                 dback = to_data_backref(back);
9981
9982                 /* We found this one, we don't need to do a lookup */
9983                 if (dback->found_ref)
9984                         continue;
9985
9986                 key.objectid = dback->root;
9987                 key.type = BTRFS_ROOT_ITEM_KEY;
9988                 key.offset = (u64)-1;
9989
9990                 root = btrfs_read_fs_root(info, &key);
9991
9992                 /* No root, definitely a bad ref, skip */
9993                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9994                         continue;
9995                 /* Other err, exit */
9996                 if (IS_ERR(root))
9997                         return PTR_ERR(root);
9998
9999                 key.objectid = dback->owner;
10000                 key.type = BTRFS_EXTENT_DATA_KEY;
10001                 key.offset = dback->offset;
10002                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10003                 if (ret) {
10004                         btrfs_release_path(path);
10005                         if (ret < 0)
10006                                 return ret;
10007                         /* Didn't find it, we can carry on */
10008                         ret = 0;
10009                         continue;
10010                 }
10011
10012                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10013                                     struct btrfs_file_extent_item);
10014                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10015                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10016                 btrfs_release_path(path);
10017                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10018                 if (cache) {
10019                         struct extent_record *tmp;
10020                         tmp = container_of(cache, struct extent_record, cache);
10021
10022                         /*
10023                          * If we found an extent record for the bytenr for this
10024                          * particular backref then we can't add it to our
10025                          * current extent record.  We only want to add backrefs
10026                          * that don't have a corresponding extent item in the
10027                          * extent tree since they likely belong to this record
10028                          * and we need to fix it if it doesn't match bytenrs.
10029                          */
10030                         if  (tmp->found_rec)
10031                                 continue;
10032                 }
10033
10034                 dback->found_ref += 1;
10035                 dback->disk_bytenr = bytenr;
10036                 dback->bytes = bytes;
10037
10038                 /*
10039                  * Set this so the verify backref code knows not to trust the
10040                  * values in this backref.
10041                  */
10042                 back->broken = 1;
10043         }
10044
10045         return 0;
10046 }
10047
10048 /*
10049  * Record orphan data ref into corresponding root.
10050  *
10051  * Return 0 if the extent item contains data ref and recorded.
10052  * Return 1 if the extent item contains no useful data ref
10053  *   On that case, it may contains only shared_dataref or metadata backref
10054  *   or the file extent exists(this should be handled by the extent bytenr
10055  *   recovery routine)
10056  * Return <0 if something goes wrong.
10057  */
10058 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10059                                       struct extent_record *rec)
10060 {
10061         struct btrfs_key key;
10062         struct btrfs_root *dest_root;
10063         struct extent_backref *back, *tmp;
10064         struct data_backref *dback;
10065         struct orphan_data_extent *orphan;
10066         struct btrfs_path path;
10067         int recorded_data_ref = 0;
10068         int ret = 0;
10069
10070         if (rec->metadata)
10071                 return 1;
10072         btrfs_init_path(&path);
10073         rbtree_postorder_for_each_entry_safe(back, tmp,
10074                                              &rec->backref_tree, node) {
10075                 if (back->full_backref || !back->is_data ||
10076                     !back->found_extent_tree)
10077                         continue;
10078                 dback = to_data_backref(back);
10079                 if (dback->found_ref)
10080                         continue;
10081                 key.objectid = dback->root;
10082                 key.type = BTRFS_ROOT_ITEM_KEY;
10083                 key.offset = (u64)-1;
10084
10085                 dest_root = btrfs_read_fs_root(fs_info, &key);
10086
10087                 /* For non-exist root we just skip it */
10088                 if (IS_ERR(dest_root) || !dest_root)
10089                         continue;
10090
10091                 key.objectid = dback->owner;
10092                 key.type = BTRFS_EXTENT_DATA_KEY;
10093                 key.offset = dback->offset;
10094
10095                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10096                 btrfs_release_path(&path);
10097                 /*
10098                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10099                  * we need to record it for inode/file extent rebuild.
10100                  * For ret > 0, we record it only for file extent rebuild.
10101                  * For ret == 0, the file extent exists but only bytenr
10102                  * mismatch, let the original bytenr fix routine to handle,
10103                  * don't record it.
10104                  */
10105                 if (ret == 0)
10106                         continue;
10107                 ret = 0;
10108                 orphan = malloc(sizeof(*orphan));
10109                 if (!orphan) {
10110                         ret = -ENOMEM;
10111                         goto out;
10112                 }
10113                 INIT_LIST_HEAD(&orphan->list);
10114                 orphan->root = dback->root;
10115                 orphan->objectid = dback->owner;
10116                 orphan->offset = dback->offset;
10117                 orphan->disk_bytenr = rec->cache.start;
10118                 orphan->disk_len = rec->cache.size;
10119                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10120                 recorded_data_ref = 1;
10121         }
10122 out:
10123         btrfs_release_path(&path);
10124         if (!ret)
10125                 return !recorded_data_ref;
10126         else
10127                 return ret;
10128 }
10129
10130 /*
10131  * when an incorrect extent item is found, this will delete
10132  * all of the existing entries for it and recreate them
10133  * based on what the tree scan found.
10134  */
10135 static int fixup_extent_refs(struct btrfs_fs_info *info,
10136                              struct cache_tree *extent_cache,
10137                              struct extent_record *rec)
10138 {
10139         struct btrfs_trans_handle *trans = NULL;
10140         int ret;
10141         struct btrfs_path path;
10142         struct cache_extent *cache;
10143         struct extent_backref *back, *tmp;
10144         int allocated = 0;
10145         u64 flags = 0;
10146
10147         if (rec->flag_block_full_backref)
10148                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10149
10150         btrfs_init_path(&path);
10151         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10152                 /*
10153                  * Sometimes the backrefs themselves are so broken they don't
10154                  * get attached to any meaningful rec, so first go back and
10155                  * check any of our backrefs that we couldn't find and throw
10156                  * them into the list if we find the backref so that
10157                  * verify_backrefs can figure out what to do.
10158                  */
10159                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10160                 if (ret < 0)
10161                         goto out;
10162         }
10163
10164         /* step one, make sure all of the backrefs agree */
10165         ret = verify_backrefs(info, &path, rec);
10166         if (ret < 0)
10167                 goto out;
10168
10169         trans = btrfs_start_transaction(info->extent_root, 1);
10170         if (IS_ERR(trans)) {
10171                 ret = PTR_ERR(trans);
10172                 goto out;
10173         }
10174
10175         /* step two, delete all the existing records */
10176         ret = delete_extent_records(trans, info->extent_root, &path,
10177                                     rec->start);
10178
10179         if (ret < 0)
10180                 goto out;
10181
10182         /* was this block corrupt?  If so, don't add references to it */
10183         cache = lookup_cache_extent(info->corrupt_blocks,
10184                                     rec->start, rec->max_size);
10185         if (cache) {
10186                 ret = 0;
10187                 goto out;
10188         }
10189
10190         /* step three, recreate all the refs we did find */
10191         rbtree_postorder_for_each_entry_safe(back, tmp,
10192                                              &rec->backref_tree, node) {
10193                 /*
10194                  * if we didn't find any references, don't create a
10195                  * new extent record
10196                  */
10197                 if (!back->found_ref)
10198                         continue;
10199
10200                 rec->bad_full_backref = 0;
10201                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10202                 allocated = 1;
10203
10204                 if (ret)
10205                         goto out;
10206         }
10207 out:
10208         if (trans) {
10209                 int err = btrfs_commit_transaction(trans, info->extent_root);
10210                 if (!ret)
10211                         ret = err;
10212         }
10213
10214         if (!ret)
10215                 fprintf(stderr, "Repaired extent references for %llu\n",
10216                                 (unsigned long long)rec->start);
10217
10218         btrfs_release_path(&path);
10219         return ret;
10220 }
10221
10222 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10223                               struct extent_record *rec)
10224 {
10225         struct btrfs_trans_handle *trans;
10226         struct btrfs_root *root = fs_info->extent_root;
10227         struct btrfs_path path;
10228         struct btrfs_extent_item *ei;
10229         struct btrfs_key key;
10230         u64 flags;
10231         int ret = 0;
10232
10233         key.objectid = rec->start;
10234         if (rec->metadata) {
10235                 key.type = BTRFS_METADATA_ITEM_KEY;
10236                 key.offset = rec->info_level;
10237         } else {
10238                 key.type = BTRFS_EXTENT_ITEM_KEY;
10239                 key.offset = rec->max_size;
10240         }
10241
10242         trans = btrfs_start_transaction(root, 0);
10243         if (IS_ERR(trans))
10244                 return PTR_ERR(trans);
10245
10246         btrfs_init_path(&path);
10247         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10248         if (ret < 0) {
10249                 btrfs_release_path(&path);
10250                 btrfs_commit_transaction(trans, root);
10251                 return ret;
10252         } else if (ret) {
10253                 fprintf(stderr, "Didn't find extent for %llu\n",
10254                         (unsigned long long)rec->start);
10255                 btrfs_release_path(&path);
10256                 btrfs_commit_transaction(trans, root);
10257                 return -ENOENT;
10258         }
10259
10260         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10261                             struct btrfs_extent_item);
10262         flags = btrfs_extent_flags(path.nodes[0], ei);
10263         if (rec->flag_block_full_backref) {
10264                 fprintf(stderr, "setting full backref on %llu\n",
10265                         (unsigned long long)key.objectid);
10266                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10267         } else {
10268                 fprintf(stderr, "clearing full backref on %llu\n",
10269                         (unsigned long long)key.objectid);
10270                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10271         }
10272         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10273         btrfs_mark_buffer_dirty(path.nodes[0]);
10274         btrfs_release_path(&path);
10275         ret = btrfs_commit_transaction(trans, root);
10276         if (!ret)
10277                 fprintf(stderr, "Repaired extent flags for %llu\n",
10278                                 (unsigned long long)rec->start);
10279
10280         return ret;
10281 }
10282
10283 /* right now we only prune from the extent allocation tree */
10284 static int prune_one_block(struct btrfs_trans_handle *trans,
10285                            struct btrfs_fs_info *info,
10286                            struct btrfs_corrupt_block *corrupt)
10287 {
10288         int ret;
10289         struct btrfs_path path;
10290         struct extent_buffer *eb;
10291         u64 found;
10292         int slot;
10293         int nritems;
10294         int level = corrupt->level + 1;
10295
10296         btrfs_init_path(&path);
10297 again:
10298         /* we want to stop at the parent to our busted block */
10299         path.lowest_level = level;
10300
10301         ret = btrfs_search_slot(trans, info->extent_root,
10302                                 &corrupt->key, &path, -1, 1);
10303
10304         if (ret < 0)
10305                 goto out;
10306
10307         eb = path.nodes[level];
10308         if (!eb) {
10309                 ret = -ENOENT;
10310                 goto out;
10311         }
10312
10313         /*
10314          * hopefully the search gave us the block we want to prune,
10315          * lets try that first
10316          */
10317         slot = path.slots[level];
10318         found =  btrfs_node_blockptr(eb, slot);
10319         if (found == corrupt->cache.start)
10320                 goto del_ptr;
10321
10322         nritems = btrfs_header_nritems(eb);
10323
10324         /* the search failed, lets scan this node and hope we find it */
10325         for (slot = 0; slot < nritems; slot++) {
10326                 found =  btrfs_node_blockptr(eb, slot);
10327                 if (found == corrupt->cache.start)
10328                         goto del_ptr;
10329         }
10330         /*
10331          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10332          * to this block
10333          */
10334         if (eb == info->extent_root->node) {
10335                 ret = -ENOENT;
10336                 goto out;
10337         } else {
10338                 level++;
10339                 btrfs_release_path(&path);
10340                 goto again;
10341         }
10342
10343 del_ptr:
10344         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10345         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10346
10347 out:
10348         btrfs_release_path(&path);
10349         return ret;
10350 }
10351
10352 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10353 {
10354         struct btrfs_trans_handle *trans = NULL;
10355         struct cache_extent *cache;
10356         struct btrfs_corrupt_block *corrupt;
10357
10358         while (1) {
10359                 cache = search_cache_extent(info->corrupt_blocks, 0);
10360                 if (!cache)
10361                         break;
10362                 if (!trans) {
10363                         trans = btrfs_start_transaction(info->extent_root, 1);
10364                         if (IS_ERR(trans))
10365                                 return PTR_ERR(trans);
10366                 }
10367                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10368                 prune_one_block(trans, info, corrupt);
10369                 remove_cache_extent(info->corrupt_blocks, cache);
10370         }
10371         if (trans)
10372                 return btrfs_commit_transaction(trans, info->extent_root);
10373         return 0;
10374 }
10375
10376 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10377 {
10378         struct btrfs_block_group_cache *cache;
10379         u64 start, end;
10380         int ret;
10381
10382         while (1) {
10383                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10384                                             &start, &end, EXTENT_DIRTY);
10385                 if (ret)
10386                         break;
10387                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10388         }
10389
10390         start = 0;
10391         while (1) {
10392                 cache = btrfs_lookup_first_block_group(fs_info, start);
10393                 if (!cache)
10394                         break;
10395                 if (cache->cached)
10396                         cache->cached = 0;
10397                 start = cache->key.objectid + cache->key.offset;
10398         }
10399 }
10400
10401 static int check_extent_refs(struct btrfs_root *root,
10402                              struct cache_tree *extent_cache)
10403 {
10404         struct extent_record *rec;
10405         struct cache_extent *cache;
10406         int ret = 0;
10407         int had_dups = 0;
10408
10409         if (repair) {
10410                 /*
10411                  * if we're doing a repair, we have to make sure
10412                  * we don't allocate from the problem extents.
10413                  * In the worst case, this will be all the
10414                  * extents in the FS
10415                  */
10416                 cache = search_cache_extent(extent_cache, 0);
10417                 while(cache) {
10418                         rec = container_of(cache, struct extent_record, cache);
10419                         set_extent_dirty(root->fs_info->excluded_extents,
10420                                          rec->start,
10421                                          rec->start + rec->max_size - 1);
10422                         cache = next_cache_extent(cache);
10423                 }
10424
10425                 /* pin down all the corrupted blocks too */
10426                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10427                 while(cache) {
10428                         set_extent_dirty(root->fs_info->excluded_extents,
10429                                          cache->start,
10430                                          cache->start + cache->size - 1);
10431                         cache = next_cache_extent(cache);
10432                 }
10433                 prune_corrupt_blocks(root->fs_info);
10434                 reset_cached_block_groups(root->fs_info);
10435         }
10436
10437         reset_cached_block_groups(root->fs_info);
10438
10439         /*
10440          * We need to delete any duplicate entries we find first otherwise we
10441          * could mess up the extent tree when we have backrefs that actually
10442          * belong to a different extent item and not the weird duplicate one.
10443          */
10444         while (repair && !list_empty(&duplicate_extents)) {
10445                 rec = to_extent_record(duplicate_extents.next);
10446                 list_del_init(&rec->list);
10447
10448                 /* Sometimes we can find a backref before we find an actual
10449                  * extent, so we need to process it a little bit to see if there
10450                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10451                  * if this is a backref screwup.  If we need to delete stuff
10452                  * process_duplicates() will return 0, otherwise it will return
10453                  * 1 and we
10454                  */
10455                 if (process_duplicates(extent_cache, rec))
10456                         continue;
10457                 ret = delete_duplicate_records(root, rec);
10458                 if (ret < 0)
10459                         return ret;
10460                 /*
10461                  * delete_duplicate_records will return the number of entries
10462                  * deleted, so if it's greater than 0 then we know we actually
10463                  * did something and we need to remove.
10464                  */
10465                 if (ret)
10466                         had_dups = 1;
10467         }
10468
10469         if (had_dups)
10470                 return -EAGAIN;
10471
10472         while(1) {
10473                 int cur_err = 0;
10474                 int fix = 0;
10475
10476                 cache = search_cache_extent(extent_cache, 0);
10477                 if (!cache)
10478                         break;
10479                 rec = container_of(cache, struct extent_record, cache);
10480                 if (rec->num_duplicates) {
10481                         fprintf(stderr, "extent item %llu has multiple extent "
10482                                 "items\n", (unsigned long long)rec->start);
10483                         cur_err = 1;
10484                 }
10485
10486                 if (rec->refs != rec->extent_item_refs) {
10487                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10488                                 (unsigned long long)rec->start,
10489                                 (unsigned long long)rec->nr);
10490                         fprintf(stderr, "extent item %llu, found %llu\n",
10491                                 (unsigned long long)rec->extent_item_refs,
10492                                 (unsigned long long)rec->refs);
10493                         ret = record_orphan_data_extents(root->fs_info, rec);
10494                         if (ret < 0)
10495                                 goto repair_abort;
10496                         fix = ret;
10497                         cur_err = 1;
10498                 }
10499                 if (all_backpointers_checked(rec, 1)) {
10500                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10501                                 (unsigned long long)rec->start,
10502                                 (unsigned long long)rec->nr);
10503                         fix = 1;
10504                         cur_err = 1;
10505                 }
10506                 if (!rec->owner_ref_checked) {
10507                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10508                                 (unsigned long long)rec->start,
10509                                 (unsigned long long)rec->nr);
10510                         fix = 1;
10511                         cur_err = 1;
10512                 }
10513
10514                 if (repair && fix) {
10515                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10516                         if (ret)
10517                                 goto repair_abort;
10518                 }
10519
10520
10521                 if (rec->bad_full_backref) {
10522                         fprintf(stderr, "bad full backref, on [%llu]\n",
10523                                 (unsigned long long)rec->start);
10524                         if (repair) {
10525                                 ret = fixup_extent_flags(root->fs_info, rec);
10526                                 if (ret)
10527                                         goto repair_abort;
10528                                 fix = 1;
10529                         }
10530                         cur_err = 1;
10531                 }
10532                 /*
10533                  * Although it's not a extent ref's problem, we reuse this
10534                  * routine for error reporting.
10535                  * No repair function yet.
10536                  */
10537                 if (rec->crossing_stripes) {
10538                         fprintf(stderr,
10539                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10540                                 rec->start, rec->start + rec->max_size);
10541                         cur_err = 1;
10542                 }
10543
10544                 if (rec->wrong_chunk_type) {
10545                         fprintf(stderr,
10546                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10547                                 rec->start, rec->start + rec->max_size);
10548                         cur_err = 1;
10549                 }
10550
10551                 remove_cache_extent(extent_cache, cache);
10552                 free_all_extent_backrefs(rec);
10553                 if (!init_extent_tree && repair && (!cur_err || fix))
10554                         clear_extent_dirty(root->fs_info->excluded_extents,
10555                                            rec->start,
10556                                            rec->start + rec->max_size - 1);
10557                 free(rec);
10558         }
10559 repair_abort:
10560         if (repair) {
10561                 if (ret && ret != -EAGAIN) {
10562                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10563                         exit(1);
10564                 } else if (!ret) {
10565                         struct btrfs_trans_handle *trans;
10566
10567                         root = root->fs_info->extent_root;
10568                         trans = btrfs_start_transaction(root, 1);
10569                         if (IS_ERR(trans)) {
10570                                 ret = PTR_ERR(trans);
10571                                 goto repair_abort;
10572                         }
10573
10574                         ret = btrfs_fix_block_accounting(trans, root);
10575                         if (ret)
10576                                 goto repair_abort;
10577                         ret = btrfs_commit_transaction(trans, root);
10578                         if (ret)
10579                                 goto repair_abort;
10580                 }
10581                 return ret;
10582         }
10583         return 0;
10584 }
10585
10586 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10587 {
10588         u64 stripe_size;
10589
10590         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10591                 stripe_size = length;
10592                 stripe_size /= num_stripes;
10593         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10594                 stripe_size = length * 2;
10595                 stripe_size /= num_stripes;
10596         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10597                 stripe_size = length;
10598                 stripe_size /= (num_stripes - 1);
10599         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10600                 stripe_size = length;
10601                 stripe_size /= (num_stripes - 2);
10602         } else {
10603                 stripe_size = length;
10604         }
10605         return stripe_size;
10606 }
10607
10608 /*
10609  * Check the chunk with its block group/dev list ref:
10610  * Return 0 if all refs seems valid.
10611  * Return 1 if part of refs seems valid, need later check for rebuild ref
10612  * like missing block group and needs to search extent tree to rebuild them.
10613  * Return -1 if essential refs are missing and unable to rebuild.
10614  */
10615 static int check_chunk_refs(struct chunk_record *chunk_rec,
10616                             struct block_group_tree *block_group_cache,
10617                             struct device_extent_tree *dev_extent_cache,
10618                             int silent)
10619 {
10620         struct cache_extent *block_group_item;
10621         struct block_group_record *block_group_rec;
10622         struct cache_extent *dev_extent_item;
10623         struct device_extent_record *dev_extent_rec;
10624         u64 devid;
10625         u64 offset;
10626         u64 length;
10627         int metadump_v2 = 0;
10628         int i;
10629         int ret = 0;
10630
10631         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10632                                                chunk_rec->offset,
10633                                                chunk_rec->length);
10634         if (block_group_item) {
10635                 block_group_rec = container_of(block_group_item,
10636                                                struct block_group_record,
10637                                                cache);
10638                 if (chunk_rec->length != block_group_rec->offset ||
10639                     chunk_rec->offset != block_group_rec->objectid ||
10640                     (!metadump_v2 &&
10641                      chunk_rec->type_flags != block_group_rec->flags)) {
10642                         if (!silent)
10643                                 fprintf(stderr,
10644                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10645                                         chunk_rec->objectid,
10646                                         chunk_rec->type,
10647                                         chunk_rec->offset,
10648                                         chunk_rec->length,
10649                                         chunk_rec->offset,
10650                                         chunk_rec->type_flags,
10651                                         block_group_rec->objectid,
10652                                         block_group_rec->type,
10653                                         block_group_rec->offset,
10654                                         block_group_rec->offset,
10655                                         block_group_rec->objectid,
10656                                         block_group_rec->flags);
10657                         ret = -1;
10658                 } else {
10659                         list_del_init(&block_group_rec->list);
10660                         chunk_rec->bg_rec = block_group_rec;
10661                 }
10662         } else {
10663                 if (!silent)
10664                         fprintf(stderr,
10665                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10666                                 chunk_rec->objectid,
10667                                 chunk_rec->type,
10668                                 chunk_rec->offset,
10669                                 chunk_rec->length,
10670                                 chunk_rec->offset,
10671                                 chunk_rec->type_flags);
10672                 ret = 1;
10673         }
10674
10675         if (metadump_v2)
10676                 return ret;
10677
10678         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10679                                     chunk_rec->num_stripes);
10680         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10681                 devid = chunk_rec->stripes[i].devid;
10682                 offset = chunk_rec->stripes[i].offset;
10683                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10684                                                        devid, offset, length);
10685                 if (dev_extent_item) {
10686                         dev_extent_rec = container_of(dev_extent_item,
10687                                                 struct device_extent_record,
10688                                                 cache);
10689                         if (dev_extent_rec->objectid != devid ||
10690                             dev_extent_rec->offset != offset ||
10691                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10692                             dev_extent_rec->length != length) {
10693                                 if (!silent)
10694                                         fprintf(stderr,
10695                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10696                                                 chunk_rec->objectid,
10697                                                 chunk_rec->type,
10698                                                 chunk_rec->offset,
10699                                                 chunk_rec->stripes[i].devid,
10700                                                 chunk_rec->stripes[i].offset,
10701                                                 dev_extent_rec->objectid,
10702                                                 dev_extent_rec->offset,
10703                                                 dev_extent_rec->length);
10704                                 ret = -1;
10705                         } else {
10706                                 list_move(&dev_extent_rec->chunk_list,
10707                                           &chunk_rec->dextents);
10708                         }
10709                 } else {
10710                         if (!silent)
10711                                 fprintf(stderr,
10712                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10713                                         chunk_rec->objectid,
10714                                         chunk_rec->type,
10715                                         chunk_rec->offset,
10716                                         chunk_rec->stripes[i].devid,
10717                                         chunk_rec->stripes[i].offset);
10718                         ret = -1;
10719                 }
10720         }
10721         return ret;
10722 }
10723
10724 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10725 int check_chunks(struct cache_tree *chunk_cache,
10726                  struct block_group_tree *block_group_cache,
10727                  struct device_extent_tree *dev_extent_cache,
10728                  struct list_head *good, struct list_head *bad,
10729                  struct list_head *rebuild, int silent)
10730 {
10731         struct cache_extent *chunk_item;
10732         struct chunk_record *chunk_rec;
10733         struct block_group_record *bg_rec;
10734         struct device_extent_record *dext_rec;
10735         int err;
10736         int ret = 0;
10737
10738         chunk_item = first_cache_extent(chunk_cache);
10739         while (chunk_item) {
10740                 chunk_rec = container_of(chunk_item, struct chunk_record,
10741                                          cache);
10742                 err = check_chunk_refs(chunk_rec, block_group_cache,
10743                                        dev_extent_cache, silent);
10744                 if (err < 0)
10745                         ret = err;
10746                 if (err == 0 && good)
10747                         list_add_tail(&chunk_rec->list, good);
10748                 if (err > 0 && rebuild)
10749                         list_add_tail(&chunk_rec->list, rebuild);
10750                 if (err < 0 && bad)
10751                         list_add_tail(&chunk_rec->list, bad);
10752                 chunk_item = next_cache_extent(chunk_item);
10753         }
10754
10755         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10756                 if (!silent)
10757                         fprintf(stderr,
10758                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10759                                 bg_rec->objectid,
10760                                 bg_rec->offset,
10761                                 bg_rec->flags);
10762                 if (!ret)
10763                         ret = 1;
10764         }
10765
10766         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10767                             chunk_list) {
10768                 if (!silent)
10769                         fprintf(stderr,
10770                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10771                                 dext_rec->objectid,
10772                                 dext_rec->offset,
10773                                 dext_rec->length);
10774                 if (!ret)
10775                         ret = 1;
10776         }
10777         return ret;
10778 }
10779
10780
10781 static int check_device_used(struct device_record *dev_rec,
10782                              struct device_extent_tree *dext_cache)
10783 {
10784         struct cache_extent *cache;
10785         struct device_extent_record *dev_extent_rec;
10786         u64 total_byte = 0;
10787
10788         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10789         while (cache) {
10790                 dev_extent_rec = container_of(cache,
10791                                               struct device_extent_record,
10792                                               cache);
10793                 if (dev_extent_rec->objectid != dev_rec->devid)
10794                         break;
10795
10796                 list_del_init(&dev_extent_rec->device_list);
10797                 total_byte += dev_extent_rec->length;
10798                 cache = next_cache_extent(cache);
10799         }
10800
10801         if (total_byte != dev_rec->byte_used) {
10802                 fprintf(stderr,
10803                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10804                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10805                         dev_rec->type, dev_rec->offset);
10806                 return -1;
10807         } else {
10808                 return 0;
10809         }
10810 }
10811
10812 /* check btrfs_dev_item -> btrfs_dev_extent */
10813 static int check_devices(struct rb_root *dev_cache,
10814                          struct device_extent_tree *dev_extent_cache)
10815 {
10816         struct rb_node *dev_node;
10817         struct device_record *dev_rec;
10818         struct device_extent_record *dext_rec;
10819         int err;
10820         int ret = 0;
10821
10822         dev_node = rb_first(dev_cache);
10823         while (dev_node) {
10824                 dev_rec = container_of(dev_node, struct device_record, node);
10825                 err = check_device_used(dev_rec, dev_extent_cache);
10826                 if (err)
10827                         ret = err;
10828
10829                 dev_node = rb_next(dev_node);
10830         }
10831         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10832                             device_list) {
10833                 fprintf(stderr,
10834                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10835                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10836                 if (!ret)
10837                         ret = 1;
10838         }
10839         return ret;
10840 }
10841
10842 static int add_root_item_to_list(struct list_head *head,
10843                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10844                                   u8 level, u8 drop_level,
10845                                   struct btrfs_key *drop_key)
10846 {
10847
10848         struct root_item_record *ri_rec;
10849         ri_rec = malloc(sizeof(*ri_rec));
10850         if (!ri_rec)
10851                 return -ENOMEM;
10852         ri_rec->bytenr = bytenr;
10853         ri_rec->objectid = objectid;
10854         ri_rec->level = level;
10855         ri_rec->drop_level = drop_level;
10856         ri_rec->last_snapshot = last_snapshot;
10857         if (drop_key)
10858                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10859         list_add_tail(&ri_rec->list, head);
10860
10861         return 0;
10862 }
10863
10864 static void free_root_item_list(struct list_head *list)
10865 {
10866         struct root_item_record *ri_rec;
10867
10868         while (!list_empty(list)) {
10869                 ri_rec = list_first_entry(list, struct root_item_record,
10870                                           list);
10871                 list_del_init(&ri_rec->list);
10872                 free(ri_rec);
10873         }
10874 }
10875
10876 static int deal_root_from_list(struct list_head *list,
10877                                struct btrfs_root *root,
10878                                struct block_info *bits,
10879                                int bits_nr,
10880                                struct cache_tree *pending,
10881                                struct cache_tree *seen,
10882                                struct cache_tree *reada,
10883                                struct cache_tree *nodes,
10884                                struct cache_tree *extent_cache,
10885                                struct cache_tree *chunk_cache,
10886                                struct rb_root *dev_cache,
10887                                struct block_group_tree *block_group_cache,
10888                                struct device_extent_tree *dev_extent_cache)
10889 {
10890         int ret = 0;
10891         u64 last;
10892
10893         while (!list_empty(list)) {
10894                 struct root_item_record *rec;
10895                 struct extent_buffer *buf;
10896                 rec = list_entry(list->next,
10897                                  struct root_item_record, list);
10898                 last = 0;
10899                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10900                 if (!extent_buffer_uptodate(buf)) {
10901                         free_extent_buffer(buf);
10902                         ret = -EIO;
10903                         break;
10904                 }
10905                 ret = add_root_to_pending(buf, extent_cache, pending,
10906                                     seen, nodes, rec->objectid);
10907                 if (ret < 0)
10908                         break;
10909                 /*
10910                  * To rebuild extent tree, we need deal with snapshot
10911                  * one by one, otherwise we deal with node firstly which
10912                  * can maximize readahead.
10913                  */
10914                 while (1) {
10915                         ret = run_next_block(root, bits, bits_nr, &last,
10916                                              pending, seen, reada, nodes,
10917                                              extent_cache, chunk_cache,
10918                                              dev_cache, block_group_cache,
10919                                              dev_extent_cache, rec);
10920                         if (ret != 0)
10921                                 break;
10922                 }
10923                 free_extent_buffer(buf);
10924                 list_del(&rec->list);
10925                 free(rec);
10926                 if (ret < 0)
10927                         break;
10928         }
10929         while (ret >= 0) {
10930                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10931                                      reada, nodes, extent_cache, chunk_cache,
10932                                      dev_cache, block_group_cache,
10933                                      dev_extent_cache, NULL);
10934                 if (ret != 0) {
10935                         if (ret > 0)
10936                                 ret = 0;
10937                         break;
10938                 }
10939         }
10940         return ret;
10941 }
10942
10943 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10944 {
10945         struct rb_root dev_cache;
10946         struct cache_tree chunk_cache;
10947         struct block_group_tree block_group_cache;
10948         struct device_extent_tree dev_extent_cache;
10949         struct cache_tree extent_cache;
10950         struct cache_tree seen;
10951         struct cache_tree pending;
10952         struct cache_tree reada;
10953         struct cache_tree nodes;
10954         struct extent_io_tree excluded_extents;
10955         struct cache_tree corrupt_blocks;
10956         struct btrfs_path path;
10957         struct btrfs_key key;
10958         struct btrfs_key found_key;
10959         int ret, err = 0;
10960         struct block_info *bits;
10961         int bits_nr;
10962         struct extent_buffer *leaf;
10963         int slot;
10964         struct btrfs_root_item ri;
10965         struct list_head dropping_trees;
10966         struct list_head normal_trees;
10967         struct btrfs_root *root1;
10968         struct btrfs_root *root;
10969         u64 objectid;
10970         u8 level;
10971
10972         root = fs_info->fs_root;
10973         dev_cache = RB_ROOT;
10974         cache_tree_init(&chunk_cache);
10975         block_group_tree_init(&block_group_cache);
10976         device_extent_tree_init(&dev_extent_cache);
10977
10978         cache_tree_init(&extent_cache);
10979         cache_tree_init(&seen);
10980         cache_tree_init(&pending);
10981         cache_tree_init(&nodes);
10982         cache_tree_init(&reada);
10983         cache_tree_init(&corrupt_blocks);
10984         extent_io_tree_init(&excluded_extents);
10985         INIT_LIST_HEAD(&dropping_trees);
10986         INIT_LIST_HEAD(&normal_trees);
10987
10988         if (repair) {
10989                 fs_info->excluded_extents = &excluded_extents;
10990                 fs_info->fsck_extent_cache = &extent_cache;
10991                 fs_info->free_extent_hook = free_extent_hook;
10992                 fs_info->corrupt_blocks = &corrupt_blocks;
10993         }
10994
10995         bits_nr = 1024;
10996         bits = malloc(bits_nr * sizeof(struct block_info));
10997         if (!bits) {
10998                 perror("malloc");
10999                 exit(1);
11000         }
11001
11002         if (ctx.progress_enabled) {
11003                 ctx.tp = TASK_EXTENTS;
11004                 task_start(ctx.info);
11005         }
11006
11007 again:
11008         root1 = fs_info->tree_root;
11009         level = btrfs_header_level(root1->node);
11010         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11011                                     root1->node->start, 0, level, 0, NULL);
11012         if (ret < 0)
11013                 goto out;
11014         root1 = fs_info->chunk_root;
11015         level = btrfs_header_level(root1->node);
11016         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11017                                     root1->node->start, 0, level, 0, NULL);
11018         if (ret < 0)
11019                 goto out;
11020         btrfs_init_path(&path);
11021         key.offset = 0;
11022         key.objectid = 0;
11023         key.type = BTRFS_ROOT_ITEM_KEY;
11024         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11025         if (ret < 0)
11026                 goto out;
11027         while(1) {
11028                 leaf = path.nodes[0];
11029                 slot = path.slots[0];
11030                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11031                         ret = btrfs_next_leaf(root, &path);
11032                         if (ret != 0)
11033                                 break;
11034                         leaf = path.nodes[0];
11035                         slot = path.slots[0];
11036                 }
11037                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11038                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11039                         unsigned long offset;
11040                         u64 last_snapshot;
11041
11042                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11043                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11044                         last_snapshot = btrfs_root_last_snapshot(&ri);
11045                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11046                                 level = btrfs_root_level(&ri);
11047                                 ret = add_root_item_to_list(&normal_trees,
11048                                                 found_key.objectid,
11049                                                 btrfs_root_bytenr(&ri),
11050                                                 last_snapshot, level,
11051                                                 0, NULL);
11052                                 if (ret < 0)
11053                                         goto out;
11054                         } else {
11055                                 level = btrfs_root_level(&ri);
11056                                 objectid = found_key.objectid;
11057                                 btrfs_disk_key_to_cpu(&found_key,
11058                                                       &ri.drop_progress);
11059                                 ret = add_root_item_to_list(&dropping_trees,
11060                                                 objectid,
11061                                                 btrfs_root_bytenr(&ri),
11062                                                 last_snapshot, level,
11063                                                 ri.drop_level, &found_key);
11064                                 if (ret < 0)
11065                                         goto out;
11066                         }
11067                 }
11068                 path.slots[0]++;
11069         }
11070         btrfs_release_path(&path);
11071
11072         /*
11073          * check_block can return -EAGAIN if it fixes something, please keep
11074          * this in mind when dealing with return values from these functions, if
11075          * we get -EAGAIN we want to fall through and restart the loop.
11076          */
11077         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11078                                   &seen, &reada, &nodes, &extent_cache,
11079                                   &chunk_cache, &dev_cache, &block_group_cache,
11080                                   &dev_extent_cache);
11081         if (ret < 0) {
11082                 if (ret == -EAGAIN)
11083                         goto loop;
11084                 goto out;
11085         }
11086         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11087                                   &pending, &seen, &reada, &nodes,
11088                                   &extent_cache, &chunk_cache, &dev_cache,
11089                                   &block_group_cache, &dev_extent_cache);
11090         if (ret < 0) {
11091                 if (ret == -EAGAIN)
11092                         goto loop;
11093                 goto out;
11094         }
11095
11096         ret = check_chunks(&chunk_cache, &block_group_cache,
11097                            &dev_extent_cache, NULL, NULL, NULL, 0);
11098         if (ret) {
11099                 if (ret == -EAGAIN)
11100                         goto loop;
11101                 err = ret;
11102         }
11103
11104         ret = check_extent_refs(root, &extent_cache);
11105         if (ret < 0) {
11106                 if (ret == -EAGAIN)
11107                         goto loop;
11108                 goto out;
11109         }
11110
11111         ret = check_devices(&dev_cache, &dev_extent_cache);
11112         if (ret && err)
11113                 ret = err;
11114
11115 out:
11116         task_stop(ctx.info);
11117         if (repair) {
11118                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11119                 extent_io_tree_cleanup(&excluded_extents);
11120                 fs_info->fsck_extent_cache = NULL;
11121                 fs_info->free_extent_hook = NULL;
11122                 fs_info->corrupt_blocks = NULL;
11123                 fs_info->excluded_extents = NULL;
11124         }
11125         free(bits);
11126         free_chunk_cache_tree(&chunk_cache);
11127         free_device_cache_tree(&dev_cache);
11128         free_block_group_tree(&block_group_cache);
11129         free_device_extent_tree(&dev_extent_cache);
11130         free_extent_cache_tree(&seen);
11131         free_extent_cache_tree(&pending);
11132         free_extent_cache_tree(&reada);
11133         free_extent_cache_tree(&nodes);
11134         free_root_item_list(&normal_trees);
11135         free_root_item_list(&dropping_trees);
11136         return ret;
11137 loop:
11138         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11139         free_extent_cache_tree(&seen);
11140         free_extent_cache_tree(&pending);
11141         free_extent_cache_tree(&reada);
11142         free_extent_cache_tree(&nodes);
11143         free_chunk_cache_tree(&chunk_cache);
11144         free_block_group_tree(&block_group_cache);
11145         free_device_cache_tree(&dev_cache);
11146         free_device_extent_tree(&dev_extent_cache);
11147         free_extent_record_cache(&extent_cache);
11148         free_root_item_list(&normal_trees);
11149         free_root_item_list(&dropping_trees);
11150         extent_io_tree_cleanup(&excluded_extents);
11151         goto again;
11152 }
11153
11154 /*
11155  * Check backrefs of a tree block given by @bytenr or @eb.
11156  *
11157  * @root:       the root containing the @bytenr or @eb
11158  * @eb:         tree block extent buffer, can be NULL
11159  * @bytenr:     bytenr of the tree block to search
11160  * @level:      tree level of the tree block
11161  * @owner:      owner of the tree block
11162  *
11163  * Return >0 for any error found and output error message
11164  * Return 0 for no error found
11165  */
11166 static int check_tree_block_ref(struct btrfs_root *root,
11167                                 struct extent_buffer *eb, u64 bytenr,
11168                                 int level, u64 owner)
11169 {
11170         struct btrfs_key key;
11171         struct btrfs_root *extent_root = root->fs_info->extent_root;
11172         struct btrfs_path path;
11173         struct btrfs_extent_item *ei;
11174         struct btrfs_extent_inline_ref *iref;
11175         struct extent_buffer *leaf;
11176         unsigned long end;
11177         unsigned long ptr;
11178         int slot;
11179         int skinny_level;
11180         int type;
11181         u32 nodesize = root->fs_info->nodesize;
11182         u32 item_size;
11183         u64 offset;
11184         int tree_reloc_root = 0;
11185         int found_ref = 0;
11186         int err = 0;
11187         int ret;
11188
11189         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11190             btrfs_header_bytenr(root->node) == bytenr)
11191                 tree_reloc_root = 1;
11192
11193         btrfs_init_path(&path);
11194         key.objectid = bytenr;
11195         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11196                 key.type = BTRFS_METADATA_ITEM_KEY;
11197         else
11198                 key.type = BTRFS_EXTENT_ITEM_KEY;
11199         key.offset = (u64)-1;
11200
11201         /* Search for the backref in extent tree */
11202         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11203         if (ret < 0) {
11204                 err |= BACKREF_MISSING;
11205                 goto out;
11206         }
11207         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11208         if (ret) {
11209                 err |= BACKREF_MISSING;
11210                 goto out;
11211         }
11212
11213         leaf = path.nodes[0];
11214         slot = path.slots[0];
11215         btrfs_item_key_to_cpu(leaf, &key, slot);
11216
11217         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11218
11219         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11220                 skinny_level = (int)key.offset;
11221                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11222         } else {
11223                 struct btrfs_tree_block_info *info;
11224
11225                 info = (struct btrfs_tree_block_info *)(ei + 1);
11226                 skinny_level = btrfs_tree_block_level(leaf, info);
11227                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11228         }
11229
11230         if (eb) {
11231                 u64 header_gen;
11232                 u64 extent_gen;
11233
11234                 if (!(btrfs_extent_flags(leaf, ei) &
11235                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11236                         error(
11237                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11238                                 key.objectid, nodesize,
11239                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11240                         err = BACKREF_MISMATCH;
11241                 }
11242                 header_gen = btrfs_header_generation(eb);
11243                 extent_gen = btrfs_extent_generation(leaf, ei);
11244                 if (header_gen != extent_gen) {
11245                         error(
11246         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11247                                 key.objectid, nodesize, header_gen,
11248                                 extent_gen);
11249                         err = BACKREF_MISMATCH;
11250                 }
11251                 if (level != skinny_level) {
11252                         error(
11253                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11254                                 key.objectid, nodesize, level, skinny_level);
11255                         err = BACKREF_MISMATCH;
11256                 }
11257                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11258                         error(
11259                         "extent[%llu %u] is referred by other roots than %llu",
11260                                 key.objectid, nodesize, root->objectid);
11261                         err = BACKREF_MISMATCH;
11262                 }
11263         }
11264
11265         /*
11266          * Iterate the extent/metadata item to find the exact backref
11267          */
11268         item_size = btrfs_item_size_nr(leaf, slot);
11269         ptr = (unsigned long)iref;
11270         end = (unsigned long)ei + item_size;
11271         while (ptr < end) {
11272                 iref = (struct btrfs_extent_inline_ref *)ptr;
11273                 type = btrfs_extent_inline_ref_type(leaf, iref);
11274                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11275
11276                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11277                         (offset == root->objectid || offset == owner)) {
11278                         found_ref = 1;
11279                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11280                         /*
11281                          * Backref of tree reloc root points to itself, no need
11282                          * to check backref any more.
11283                          */
11284                         if (tree_reloc_root)
11285                                 found_ref = 1;
11286                         else
11287                         /* Check if the backref points to valid referencer */
11288                                 found_ref = !check_tree_block_ref(root, NULL,
11289                                                 offset, level + 1, owner);
11290                 }
11291
11292                 if (found_ref)
11293                         break;
11294                 ptr += btrfs_extent_inline_ref_size(type);
11295         }
11296
11297         /*
11298          * Inlined extent item doesn't have what we need, check
11299          * TREE_BLOCK_REF_KEY
11300          */
11301         if (!found_ref) {
11302                 btrfs_release_path(&path);
11303                 key.objectid = bytenr;
11304                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11305                 key.offset = root->objectid;
11306
11307                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11308                 if (!ret)
11309                         found_ref = 1;
11310         }
11311         if (!found_ref)
11312                 err |= BACKREF_MISSING;
11313 out:
11314         btrfs_release_path(&path);
11315         if (eb && (err & BACKREF_MISSING))
11316                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11317                         bytenr, nodesize, owner, level);
11318         return err;
11319 }
11320
11321 /*
11322  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11323  *
11324  * Return >0 any error found and output error message
11325  * Return 0 for no error found
11326  */
11327 static int check_extent_data_item(struct btrfs_root *root,
11328                                   struct extent_buffer *eb, int slot)
11329 {
11330         struct btrfs_file_extent_item *fi;
11331         struct btrfs_path path;
11332         struct btrfs_root *extent_root = root->fs_info->extent_root;
11333         struct btrfs_key fi_key;
11334         struct btrfs_key dbref_key;
11335         struct extent_buffer *leaf;
11336         struct btrfs_extent_item *ei;
11337         struct btrfs_extent_inline_ref *iref;
11338         struct btrfs_extent_data_ref *dref;
11339         u64 owner;
11340         u64 disk_bytenr;
11341         u64 disk_num_bytes;
11342         u64 extent_num_bytes;
11343         u64 extent_flags;
11344         u32 item_size;
11345         unsigned long end;
11346         unsigned long ptr;
11347         int type;
11348         u64 ref_root;
11349         int found_dbackref = 0;
11350         int err = 0;
11351         int ret;
11352
11353         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11354         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11355
11356         /* Nothing to check for hole and inline data extents */
11357         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11358             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11359                 return 0;
11360
11361         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11362         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11363         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11364
11365         /* Check unaligned disk_num_bytes and num_bytes */
11366         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11367                 error(
11368 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11369                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11370                         root->fs_info->sectorsize);
11371                 err |= BYTES_UNALIGNED;
11372         } else {
11373                 data_bytes_allocated += disk_num_bytes;
11374         }
11375         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11376                 error(
11377 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11378                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11379                         root->fs_info->sectorsize);
11380                 err |= BYTES_UNALIGNED;
11381         } else {
11382                 data_bytes_referenced += extent_num_bytes;
11383         }
11384         owner = btrfs_header_owner(eb);
11385
11386         /* Check the extent item of the file extent in extent tree */
11387         btrfs_init_path(&path);
11388         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11389         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11390         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11391
11392         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11393         if (ret)
11394                 goto out;
11395
11396         leaf = path.nodes[0];
11397         slot = path.slots[0];
11398         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11399
11400         extent_flags = btrfs_extent_flags(leaf, ei);
11401
11402         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11403                 error(
11404                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11405                     disk_bytenr, disk_num_bytes,
11406                     BTRFS_EXTENT_FLAG_DATA);
11407                 err |= BACKREF_MISMATCH;
11408         }
11409
11410         /* Check data backref inside that extent item */
11411         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11412         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11413         ptr = (unsigned long)iref;
11414         end = (unsigned long)ei + item_size;
11415         while (ptr < end) {
11416                 iref = (struct btrfs_extent_inline_ref *)ptr;
11417                 type = btrfs_extent_inline_ref_type(leaf, iref);
11418                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11419
11420                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11421                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11422                         if (ref_root == owner || ref_root == root->objectid)
11423                                 found_dbackref = 1;
11424                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11425                         found_dbackref = !check_tree_block_ref(root, NULL,
11426                                 btrfs_extent_inline_ref_offset(leaf, iref),
11427                                 0, owner);
11428                 }
11429
11430                 if (found_dbackref)
11431                         break;
11432                 ptr += btrfs_extent_inline_ref_size(type);
11433         }
11434
11435         if (!found_dbackref) {
11436                 btrfs_release_path(&path);
11437
11438                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11439                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11440                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11441                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11442                                 fi_key.objectid, fi_key.offset);
11443
11444                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11445                                         &dbref_key, &path, 0, 0);
11446                 if (!ret) {
11447                         found_dbackref = 1;
11448                         goto out;
11449                 }
11450
11451                 btrfs_release_path(&path);
11452
11453                 /*
11454                  * Neither inlined nor EXTENT_DATA_REF found, try
11455                  * SHARED_DATA_REF as last chance.
11456                  */
11457                 dbref_key.objectid = disk_bytenr;
11458                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11459                 dbref_key.offset = eb->start;
11460
11461                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11462                                         &dbref_key, &path, 0, 0);
11463                 if (!ret) {
11464                         found_dbackref = 1;
11465                         goto out;
11466                 }
11467         }
11468
11469 out:
11470         if (!found_dbackref)
11471                 err |= BACKREF_MISSING;
11472         btrfs_release_path(&path);
11473         if (err & BACKREF_MISSING) {
11474                 error("data extent[%llu %llu] backref lost",
11475                       disk_bytenr, disk_num_bytes);
11476         }
11477         return err;
11478 }
11479
11480 /*
11481  * Get real tree block level for the case like shared block
11482  * Return >= 0 as tree level
11483  * Return <0 for error
11484  */
11485 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11486 {
11487         struct extent_buffer *eb;
11488         struct btrfs_path path;
11489         struct btrfs_key key;
11490         struct btrfs_extent_item *ei;
11491         u64 flags;
11492         u64 transid;
11493         u8 backref_level;
11494         u8 header_level;
11495         int ret;
11496
11497         /* Search extent tree for extent generation and level */
11498         key.objectid = bytenr;
11499         key.type = BTRFS_METADATA_ITEM_KEY;
11500         key.offset = (u64)-1;
11501
11502         btrfs_init_path(&path);
11503         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11504         if (ret < 0)
11505                 goto release_out;
11506         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11507         if (ret < 0)
11508                 goto release_out;
11509         if (ret > 0) {
11510                 ret = -ENOENT;
11511                 goto release_out;
11512         }
11513
11514         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11515         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11516                             struct btrfs_extent_item);
11517         flags = btrfs_extent_flags(path.nodes[0], ei);
11518         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11519                 ret = -ENOENT;
11520                 goto release_out;
11521         }
11522
11523         /* Get transid for later read_tree_block() check */
11524         transid = btrfs_extent_generation(path.nodes[0], ei);
11525
11526         /* Get backref level as one source */
11527         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11528                 backref_level = key.offset;
11529         } else {
11530                 struct btrfs_tree_block_info *info;
11531
11532                 info = (struct btrfs_tree_block_info *)(ei + 1);
11533                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11534         }
11535         btrfs_release_path(&path);
11536
11537         /* Get level from tree block as an alternative source */
11538         eb = read_tree_block(fs_info, bytenr, transid);
11539         if (!extent_buffer_uptodate(eb)) {
11540                 free_extent_buffer(eb);
11541                 return -EIO;
11542         }
11543         header_level = btrfs_header_level(eb);
11544         free_extent_buffer(eb);
11545
11546         if (header_level != backref_level)
11547                 return -EIO;
11548         return header_level;
11549
11550 release_out:
11551         btrfs_release_path(&path);
11552         return ret;
11553 }
11554
11555 /*
11556  * Check if a tree block backref is valid (points to a valid tree block)
11557  * if level == -1, level will be resolved
11558  * Return >0 for any error found and print error message
11559  */
11560 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11561                                     u64 bytenr, int level)
11562 {
11563         struct btrfs_root *root;
11564         struct btrfs_key key;
11565         struct btrfs_path path;
11566         struct extent_buffer *eb;
11567         struct extent_buffer *node;
11568         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11569         int err = 0;
11570         int ret;
11571
11572         /* Query level for level == -1 special case */
11573         if (level == -1)
11574                 level = query_tree_block_level(fs_info, bytenr);
11575         if (level < 0) {
11576                 err |= REFERENCER_MISSING;
11577                 goto out;
11578         }
11579
11580         key.objectid = root_id;
11581         key.type = BTRFS_ROOT_ITEM_KEY;
11582         key.offset = (u64)-1;
11583
11584         root = btrfs_read_fs_root(fs_info, &key);
11585         if (IS_ERR(root)) {
11586                 err |= REFERENCER_MISSING;
11587                 goto out;
11588         }
11589
11590         /* Read out the tree block to get item/node key */
11591         eb = read_tree_block(fs_info, bytenr, 0);
11592         if (!extent_buffer_uptodate(eb)) {
11593                 err |= REFERENCER_MISSING;
11594                 free_extent_buffer(eb);
11595                 goto out;
11596         }
11597
11598         /* Empty tree, no need to check key */
11599         if (!btrfs_header_nritems(eb) && !level) {
11600                 free_extent_buffer(eb);
11601                 goto out;
11602         }
11603
11604         if (level)
11605                 btrfs_node_key_to_cpu(eb, &key, 0);
11606         else
11607                 btrfs_item_key_to_cpu(eb, &key, 0);
11608
11609         free_extent_buffer(eb);
11610
11611         btrfs_init_path(&path);
11612         path.lowest_level = level;
11613         /* Search with the first key, to ensure we can reach it */
11614         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11615         if (ret < 0) {
11616                 err |= REFERENCER_MISSING;
11617                 goto release_out;
11618         }
11619
11620         node = path.nodes[level];
11621         if (btrfs_header_bytenr(node) != bytenr) {
11622                 error(
11623         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11624                         bytenr, nodesize, bytenr,
11625                         btrfs_header_bytenr(node));
11626                 err |= REFERENCER_MISMATCH;
11627         }
11628         if (btrfs_header_level(node) != level) {
11629                 error(
11630         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11631                         bytenr, nodesize, level,
11632                         btrfs_header_level(node));
11633                 err |= REFERENCER_MISMATCH;
11634         }
11635
11636 release_out:
11637         btrfs_release_path(&path);
11638 out:
11639         if (err & REFERENCER_MISSING) {
11640                 if (level < 0)
11641                         error("extent [%llu %d] lost referencer (owner: %llu)",
11642                                 bytenr, nodesize, root_id);
11643                 else
11644                         error(
11645                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11646                                 bytenr, nodesize, root_id, level);
11647         }
11648
11649         return err;
11650 }
11651
11652 /*
11653  * Check if tree block @eb is tree reloc root.
11654  * Return 0 if it's not or any problem happens
11655  * Return 1 if it's a tree reloc root
11656  */
11657 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11658                                  struct extent_buffer *eb)
11659 {
11660         struct btrfs_root *tree_reloc_root;
11661         struct btrfs_key key;
11662         u64 bytenr = btrfs_header_bytenr(eb);
11663         u64 owner = btrfs_header_owner(eb);
11664         int ret = 0;
11665
11666         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11667         key.offset = owner;
11668         key.type = BTRFS_ROOT_ITEM_KEY;
11669
11670         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11671         if (IS_ERR(tree_reloc_root))
11672                 return 0;
11673
11674         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11675                 ret = 1;
11676         btrfs_free_fs_root(tree_reloc_root);
11677         return ret;
11678 }
11679
11680 /*
11681  * Check referencer for shared block backref
11682  * If level == -1, this function will resolve the level.
11683  */
11684 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11685                                      u64 parent, u64 bytenr, int level)
11686 {
11687         struct extent_buffer *eb;
11688         u32 nr;
11689         int found_parent = 0;
11690         int i;
11691
11692         eb = read_tree_block(fs_info, parent, 0);
11693         if (!extent_buffer_uptodate(eb))
11694                 goto out;
11695
11696         if (level == -1)
11697                 level = query_tree_block_level(fs_info, bytenr);
11698         if (level < 0)
11699                 goto out;
11700
11701         /* It's possible it's a tree reloc root */
11702         if (parent == bytenr) {
11703                 if (is_tree_reloc_root(fs_info, eb))
11704                         found_parent = 1;
11705                 goto out;
11706         }
11707
11708         if (level + 1 != btrfs_header_level(eb))
11709                 goto out;
11710
11711         nr = btrfs_header_nritems(eb);
11712         for (i = 0; i < nr; i++) {
11713                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11714                         found_parent = 1;
11715                         break;
11716                 }
11717         }
11718 out:
11719         free_extent_buffer(eb);
11720         if (!found_parent) {
11721                 error(
11722         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11723                         bytenr, fs_info->nodesize, parent, level);
11724                 return REFERENCER_MISSING;
11725         }
11726         return 0;
11727 }
11728
11729 /*
11730  * Check referencer for normal (inlined) data ref
11731  * If len == 0, it will be resolved by searching in extent tree
11732  */
11733 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11734                                      u64 root_id, u64 objectid, u64 offset,
11735                                      u64 bytenr, u64 len, u32 count)
11736 {
11737         struct btrfs_root *root;
11738         struct btrfs_root *extent_root = fs_info->extent_root;
11739         struct btrfs_key key;
11740         struct btrfs_path path;
11741         struct extent_buffer *leaf;
11742         struct btrfs_file_extent_item *fi;
11743         u32 found_count = 0;
11744         int slot;
11745         int ret = 0;
11746
11747         if (!len) {
11748                 key.objectid = bytenr;
11749                 key.type = BTRFS_EXTENT_ITEM_KEY;
11750                 key.offset = (u64)-1;
11751
11752                 btrfs_init_path(&path);
11753                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11754                 if (ret < 0)
11755                         goto out;
11756                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11757                 if (ret)
11758                         goto out;
11759                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11760                 if (key.objectid != bytenr ||
11761                     key.type != BTRFS_EXTENT_ITEM_KEY)
11762                         goto out;
11763                 len = key.offset;
11764                 btrfs_release_path(&path);
11765         }
11766         key.objectid = root_id;
11767         key.type = BTRFS_ROOT_ITEM_KEY;
11768         key.offset = (u64)-1;
11769         btrfs_init_path(&path);
11770
11771         root = btrfs_read_fs_root(fs_info, &key);
11772         if (IS_ERR(root))
11773                 goto out;
11774
11775         key.objectid = objectid;
11776         key.type = BTRFS_EXTENT_DATA_KEY;
11777         /*
11778          * It can be nasty as data backref offset is
11779          * file offset - file extent offset, which is smaller or
11780          * equal to original backref offset.  The only special case is
11781          * overflow.  So we need to special check and do further search.
11782          */
11783         key.offset = offset & (1ULL << 63) ? 0 : offset;
11784
11785         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11786         if (ret < 0)
11787                 goto out;
11788
11789         /*
11790          * Search afterwards to get correct one
11791          * NOTE: As we must do a comprehensive check on the data backref to
11792          * make sure the dref count also matches, we must iterate all file
11793          * extents for that inode.
11794          */
11795         while (1) {
11796                 leaf = path.nodes[0];
11797                 slot = path.slots[0];
11798
11799                 if (slot >= btrfs_header_nritems(leaf))
11800                         goto next;
11801                 btrfs_item_key_to_cpu(leaf, &key, slot);
11802                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11803                         break;
11804                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11805                 /*
11806                  * Except normal disk bytenr and disk num bytes, we still
11807                  * need to do extra check on dbackref offset as
11808                  * dbackref offset = file_offset - file_extent_offset
11809                  */
11810                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11811                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11812                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11813                     offset)
11814                         found_count++;
11815
11816 next:
11817                 ret = btrfs_next_item(root, &path);
11818                 if (ret)
11819                         break;
11820         }
11821 out:
11822         btrfs_release_path(&path);
11823         if (found_count != count) {
11824                 error(
11825 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11826                         bytenr, len, root_id, objectid, offset, count, found_count);
11827                 return REFERENCER_MISSING;
11828         }
11829         return 0;
11830 }
11831
11832 /*
11833  * Check if the referencer of a shared data backref exists
11834  */
11835 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11836                                      u64 parent, u64 bytenr)
11837 {
11838         struct extent_buffer *eb;
11839         struct btrfs_key key;
11840         struct btrfs_file_extent_item *fi;
11841         u32 nr;
11842         int found_parent = 0;
11843         int i;
11844
11845         eb = read_tree_block(fs_info, parent, 0);
11846         if (!extent_buffer_uptodate(eb))
11847                 goto out;
11848
11849         nr = btrfs_header_nritems(eb);
11850         for (i = 0; i < nr; i++) {
11851                 btrfs_item_key_to_cpu(eb, &key, i);
11852                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11853                         continue;
11854
11855                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11856                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11857                         continue;
11858
11859                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11860                         found_parent = 1;
11861                         break;
11862                 }
11863         }
11864
11865 out:
11866         free_extent_buffer(eb);
11867         if (!found_parent) {
11868                 error("shared extent %llu referencer lost (parent: %llu)",
11869                         bytenr, parent);
11870                 return REFERENCER_MISSING;
11871         }
11872         return 0;
11873 }
11874
11875 /*
11876  * This function will check a given extent item, including its backref and
11877  * itself (like crossing stripe boundary and type)
11878  *
11879  * Since we don't use extent_record anymore, introduce new error bit
11880  */
11881 static int check_extent_item(struct btrfs_fs_info *fs_info,
11882                              struct extent_buffer *eb, int slot)
11883 {
11884         struct btrfs_extent_item *ei;
11885         struct btrfs_extent_inline_ref *iref;
11886         struct btrfs_extent_data_ref *dref;
11887         unsigned long end;
11888         unsigned long ptr;
11889         int type;
11890         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11891         u32 item_size = btrfs_item_size_nr(eb, slot);
11892         u64 flags;
11893         u64 offset;
11894         int metadata = 0;
11895         int level;
11896         struct btrfs_key key;
11897         int ret;
11898         int err = 0;
11899
11900         btrfs_item_key_to_cpu(eb, &key, slot);
11901         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11902                 bytes_used += key.offset;
11903         else
11904                 bytes_used += nodesize;
11905
11906         if (item_size < sizeof(*ei)) {
11907                 /*
11908                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11909                  * old thing when on disk format is still un-determined.
11910                  * No need to care about it anymore
11911                  */
11912                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11913                 return -ENOTTY;
11914         }
11915
11916         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11917         flags = btrfs_extent_flags(eb, ei);
11918
11919         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11920                 metadata = 1;
11921         if (metadata && check_crossing_stripes(global_info, key.objectid,
11922                                                eb->len)) {
11923                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11924                       key.objectid, key.objectid + nodesize);
11925                 err |= CROSSING_STRIPE_BOUNDARY;
11926         }
11927
11928         ptr = (unsigned long)(ei + 1);
11929
11930         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11931                 /* Old EXTENT_ITEM metadata */
11932                 struct btrfs_tree_block_info *info;
11933
11934                 info = (struct btrfs_tree_block_info *)ptr;
11935                 level = btrfs_tree_block_level(eb, info);
11936                 ptr += sizeof(struct btrfs_tree_block_info);
11937         } else {
11938                 /* New METADATA_ITEM */
11939                 level = key.offset;
11940         }
11941         end = (unsigned long)ei + item_size;
11942
11943 next:
11944         /* Reached extent item end normally */
11945         if (ptr == end)
11946                 goto out;
11947
11948         /* Beyond extent item end, wrong item size */
11949         if (ptr > end) {
11950                 err |= ITEM_SIZE_MISMATCH;
11951                 error("extent item at bytenr %llu slot %d has wrong size",
11952                         eb->start, slot);
11953                 goto out;
11954         }
11955
11956         /* Now check every backref in this extent item */
11957         iref = (struct btrfs_extent_inline_ref *)ptr;
11958         type = btrfs_extent_inline_ref_type(eb, iref);
11959         offset = btrfs_extent_inline_ref_offset(eb, iref);
11960         switch (type) {
11961         case BTRFS_TREE_BLOCK_REF_KEY:
11962                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11963                                                level);
11964                 err |= ret;
11965                 break;
11966         case BTRFS_SHARED_BLOCK_REF_KEY:
11967                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11968                                                  level);
11969                 err |= ret;
11970                 break;
11971         case BTRFS_EXTENT_DATA_REF_KEY:
11972                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11973                 ret = check_extent_data_backref(fs_info,
11974                                 btrfs_extent_data_ref_root(eb, dref),
11975                                 btrfs_extent_data_ref_objectid(eb, dref),
11976                                 btrfs_extent_data_ref_offset(eb, dref),
11977                                 key.objectid, key.offset,
11978                                 btrfs_extent_data_ref_count(eb, dref));
11979                 err |= ret;
11980                 break;
11981         case BTRFS_SHARED_DATA_REF_KEY:
11982                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11983                 err |= ret;
11984                 break;
11985         default:
11986                 error("extent[%llu %d %llu] has unknown ref type: %d",
11987                         key.objectid, key.type, key.offset, type);
11988                 err |= UNKNOWN_TYPE;
11989                 goto out;
11990         }
11991
11992         ptr += btrfs_extent_inline_ref_size(type);
11993         goto next;
11994
11995 out:
11996         return err;
11997 }
11998
11999 /*
12000  * Check if a dev extent item is referred correctly by its chunk
12001  */
12002 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12003                                  struct extent_buffer *eb, int slot)
12004 {
12005         struct btrfs_root *chunk_root = fs_info->chunk_root;
12006         struct btrfs_dev_extent *ptr;
12007         struct btrfs_path path;
12008         struct btrfs_key chunk_key;
12009         struct btrfs_key devext_key;
12010         struct btrfs_chunk *chunk;
12011         struct extent_buffer *l;
12012         int num_stripes;
12013         u64 length;
12014         int i;
12015         int found_chunk = 0;
12016         int ret;
12017
12018         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12019         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12020         length = btrfs_dev_extent_length(eb, ptr);
12021
12022         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12023         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12024         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12025
12026         btrfs_init_path(&path);
12027         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12028         if (ret)
12029                 goto out;
12030
12031         l = path.nodes[0];
12032         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12033         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12034                                       chunk_key.offset);
12035         if (ret < 0)
12036                 goto out;
12037
12038         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12039                 goto out;
12040
12041         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12042         for (i = 0; i < num_stripes; i++) {
12043                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12044                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12045
12046                 if (devid == devext_key.objectid &&
12047                     offset == devext_key.offset) {
12048                         found_chunk = 1;
12049                         break;
12050                 }
12051         }
12052 out:
12053         btrfs_release_path(&path);
12054         if (!found_chunk) {
12055                 error(
12056                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12057                         devext_key.objectid, devext_key.offset, length);
12058                 return REFERENCER_MISSING;
12059         }
12060         return 0;
12061 }
12062
12063 /*
12064  * Check if the used space is correct with the dev item
12065  */
12066 static int check_dev_item(struct btrfs_fs_info *fs_info,
12067                           struct extent_buffer *eb, int slot)
12068 {
12069         struct btrfs_root *dev_root = fs_info->dev_root;
12070         struct btrfs_dev_item *dev_item;
12071         struct btrfs_path path;
12072         struct btrfs_key key;
12073         struct btrfs_dev_extent *ptr;
12074         u64 dev_id;
12075         u64 used;
12076         u64 total = 0;
12077         int ret;
12078
12079         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12080         dev_id = btrfs_device_id(eb, dev_item);
12081         used = btrfs_device_bytes_used(eb, dev_item);
12082
12083         key.objectid = dev_id;
12084         key.type = BTRFS_DEV_EXTENT_KEY;
12085         key.offset = 0;
12086
12087         btrfs_init_path(&path);
12088         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12089         if (ret < 0) {
12090                 btrfs_item_key_to_cpu(eb, &key, slot);
12091                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12092                         key.objectid, key.type, key.offset);
12093                 btrfs_release_path(&path);
12094                 return REFERENCER_MISSING;
12095         }
12096
12097         /* Iterate dev_extents to calculate the used space of a device */
12098         while (1) {
12099                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12100                         goto next;
12101
12102                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12103                 if (key.objectid > dev_id)
12104                         break;
12105                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12106                         goto next;
12107
12108                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12109                                      struct btrfs_dev_extent);
12110                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12111 next:
12112                 ret = btrfs_next_item(dev_root, &path);
12113                 if (ret)
12114                         break;
12115         }
12116         btrfs_release_path(&path);
12117
12118         if (used != total) {
12119                 btrfs_item_key_to_cpu(eb, &key, slot);
12120                 error(
12121 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12122                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12123                         BTRFS_DEV_EXTENT_KEY, dev_id);
12124                 return ACCOUNTING_MISMATCH;
12125         }
12126         return 0;
12127 }
12128
12129 /*
12130  * Check a block group item with its referener (chunk) and its used space
12131  * with extent/metadata item
12132  */
12133 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12134                                   struct extent_buffer *eb, int slot)
12135 {
12136         struct btrfs_root *extent_root = fs_info->extent_root;
12137         struct btrfs_root *chunk_root = fs_info->chunk_root;
12138         struct btrfs_block_group_item *bi;
12139         struct btrfs_block_group_item bg_item;
12140         struct btrfs_path path;
12141         struct btrfs_key bg_key;
12142         struct btrfs_key chunk_key;
12143         struct btrfs_key extent_key;
12144         struct btrfs_chunk *chunk;
12145         struct extent_buffer *leaf;
12146         struct btrfs_extent_item *ei;
12147         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12148         u64 flags;
12149         u64 bg_flags;
12150         u64 used;
12151         u64 total = 0;
12152         int ret;
12153         int err = 0;
12154
12155         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12156         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12157         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12158         used = btrfs_block_group_used(&bg_item);
12159         bg_flags = btrfs_block_group_flags(&bg_item);
12160
12161         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12162         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12163         chunk_key.offset = bg_key.objectid;
12164
12165         btrfs_init_path(&path);
12166         /* Search for the referencer chunk */
12167         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12168         if (ret) {
12169                 error(
12170                 "block group[%llu %llu] did not find the related chunk item",
12171                         bg_key.objectid, bg_key.offset);
12172                 err |= REFERENCER_MISSING;
12173         } else {
12174                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12175                                         struct btrfs_chunk);
12176                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12177                                                 bg_key.offset) {
12178                         error(
12179         "block group[%llu %llu] related chunk item length does not match",
12180                                 bg_key.objectid, bg_key.offset);
12181                         err |= REFERENCER_MISMATCH;
12182                 }
12183         }
12184         btrfs_release_path(&path);
12185
12186         /* Search from the block group bytenr */
12187         extent_key.objectid = bg_key.objectid;
12188         extent_key.type = 0;
12189         extent_key.offset = 0;
12190
12191         btrfs_init_path(&path);
12192         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12193         if (ret < 0)
12194                 goto out;
12195
12196         /* Iterate extent tree to account used space */
12197         while (1) {
12198                 leaf = path.nodes[0];
12199
12200                 /* Search slot can point to the last item beyond leaf nritems */
12201                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12202                         goto next;
12203
12204                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12205                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12206                         break;
12207
12208                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12209                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12210                         goto next;
12211                 if (extent_key.objectid < bg_key.objectid)
12212                         goto next;
12213
12214                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12215                         total += nodesize;
12216                 else
12217                         total += extent_key.offset;
12218
12219                 ei = btrfs_item_ptr(leaf, path.slots[0],
12220                                     struct btrfs_extent_item);
12221                 flags = btrfs_extent_flags(leaf, ei);
12222                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12223                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12224                                 error(
12225                         "bad extent[%llu, %llu) type mismatch with chunk",
12226                                         extent_key.objectid,
12227                                         extent_key.objectid + extent_key.offset);
12228                                 err |= CHUNK_TYPE_MISMATCH;
12229                         }
12230                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12231                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12232                                     BTRFS_BLOCK_GROUP_METADATA))) {
12233                                 error(
12234                         "bad extent[%llu, %llu) type mismatch with chunk",
12235                                         extent_key.objectid,
12236                                         extent_key.objectid + nodesize);
12237                                 err |= CHUNK_TYPE_MISMATCH;
12238                         }
12239                 }
12240 next:
12241                 ret = btrfs_next_item(extent_root, &path);
12242                 if (ret)
12243                         break;
12244         }
12245
12246 out:
12247         btrfs_release_path(&path);
12248
12249         if (total != used) {
12250                 error(
12251                 "block group[%llu %llu] used %llu but extent items used %llu",
12252                         bg_key.objectid, bg_key.offset, used, total);
12253                 err |= ACCOUNTING_MISMATCH;
12254         }
12255         return err;
12256 }
12257
12258 /*
12259  * Check a chunk item.
12260  * Including checking all referred dev_extents and block group
12261  */
12262 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12263                             struct extent_buffer *eb, int slot)
12264 {
12265         struct btrfs_root *extent_root = fs_info->extent_root;
12266         struct btrfs_root *dev_root = fs_info->dev_root;
12267         struct btrfs_path path;
12268         struct btrfs_key chunk_key;
12269         struct btrfs_key bg_key;
12270         struct btrfs_key devext_key;
12271         struct btrfs_chunk *chunk;
12272         struct extent_buffer *leaf;
12273         struct btrfs_block_group_item *bi;
12274         struct btrfs_block_group_item bg_item;
12275         struct btrfs_dev_extent *ptr;
12276         u64 length;
12277         u64 chunk_end;
12278         u64 stripe_len;
12279         u64 type;
12280         int num_stripes;
12281         u64 offset;
12282         u64 objectid;
12283         int i;
12284         int ret;
12285         int err = 0;
12286
12287         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12288         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12289         length = btrfs_chunk_length(eb, chunk);
12290         chunk_end = chunk_key.offset + length;
12291         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12292                                       chunk_key.offset);
12293         if (ret < 0) {
12294                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12295                         chunk_end);
12296                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12297                 goto out;
12298         }
12299         type = btrfs_chunk_type(eb, chunk);
12300
12301         bg_key.objectid = chunk_key.offset;
12302         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12303         bg_key.offset = length;
12304
12305         btrfs_init_path(&path);
12306         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12307         if (ret) {
12308                 error(
12309                 "chunk[%llu %llu) did not find the related block group item",
12310                         chunk_key.offset, chunk_end);
12311                 err |= REFERENCER_MISSING;
12312         } else{
12313                 leaf = path.nodes[0];
12314                 bi = btrfs_item_ptr(leaf, path.slots[0],
12315                                     struct btrfs_block_group_item);
12316                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12317                                    sizeof(bg_item));
12318                 if (btrfs_block_group_flags(&bg_item) != type) {
12319                         error(
12320 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12321                                 chunk_key.offset, chunk_end, type,
12322                                 btrfs_block_group_flags(&bg_item));
12323                         err |= REFERENCER_MISSING;
12324                 }
12325         }
12326
12327         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12328         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12329         for (i = 0; i < num_stripes; i++) {
12330                 btrfs_release_path(&path);
12331                 btrfs_init_path(&path);
12332                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12333                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12334                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12335
12336                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12337                                         0, 0);
12338                 if (ret)
12339                         goto not_match_dev;
12340
12341                 leaf = path.nodes[0];
12342                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12343                                      struct btrfs_dev_extent);
12344                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12345                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12346                 if (objectid != chunk_key.objectid ||
12347                     offset != chunk_key.offset ||
12348                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12349                         goto not_match_dev;
12350                 continue;
12351 not_match_dev:
12352                 err |= BACKREF_MISSING;
12353                 error(
12354                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12355                         chunk_key.objectid, chunk_end, i);
12356                 continue;
12357         }
12358         btrfs_release_path(&path);
12359 out:
12360         return err;
12361 }
12362
12363 /*
12364  * Main entry function to check known items and update related accounting info
12365  */
12366 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12367 {
12368         struct btrfs_fs_info *fs_info = root->fs_info;
12369         struct btrfs_key key;
12370         int slot = 0;
12371         int type;
12372         struct btrfs_extent_data_ref *dref;
12373         int ret;
12374         int err = 0;
12375
12376 next:
12377         btrfs_item_key_to_cpu(eb, &key, slot);
12378         type = key.type;
12379
12380         switch (type) {
12381         case BTRFS_EXTENT_DATA_KEY:
12382                 ret = check_extent_data_item(root, eb, slot);
12383                 err |= ret;
12384                 break;
12385         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12386                 ret = check_block_group_item(fs_info, eb, slot);
12387                 err |= ret;
12388                 break;
12389         case BTRFS_DEV_ITEM_KEY:
12390                 ret = check_dev_item(fs_info, eb, slot);
12391                 err |= ret;
12392                 break;
12393         case BTRFS_CHUNK_ITEM_KEY:
12394                 ret = check_chunk_item(fs_info, eb, slot);
12395                 err |= ret;
12396                 break;
12397         case BTRFS_DEV_EXTENT_KEY:
12398                 ret = check_dev_extent_item(fs_info, eb, slot);
12399                 err |= ret;
12400                 break;
12401         case BTRFS_EXTENT_ITEM_KEY:
12402         case BTRFS_METADATA_ITEM_KEY:
12403                 ret = check_extent_item(fs_info, eb, slot);
12404                 err |= ret;
12405                 break;
12406         case BTRFS_EXTENT_CSUM_KEY:
12407                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12408                 break;
12409         case BTRFS_TREE_BLOCK_REF_KEY:
12410                 ret = check_tree_block_backref(fs_info, key.offset,
12411                                                key.objectid, -1);
12412                 err |= ret;
12413                 break;
12414         case BTRFS_EXTENT_DATA_REF_KEY:
12415                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12416                 ret = check_extent_data_backref(fs_info,
12417                                 btrfs_extent_data_ref_root(eb, dref),
12418                                 btrfs_extent_data_ref_objectid(eb, dref),
12419                                 btrfs_extent_data_ref_offset(eb, dref),
12420                                 key.objectid, 0,
12421                                 btrfs_extent_data_ref_count(eb, dref));
12422                 err |= ret;
12423                 break;
12424         case BTRFS_SHARED_BLOCK_REF_KEY:
12425                 ret = check_shared_block_backref(fs_info, key.offset,
12426                                                  key.objectid, -1);
12427                 err |= ret;
12428                 break;
12429         case BTRFS_SHARED_DATA_REF_KEY:
12430                 ret = check_shared_data_backref(fs_info, key.offset,
12431                                                 key.objectid);
12432                 err |= ret;
12433                 break;
12434         default:
12435                 break;
12436         }
12437
12438         if (++slot < btrfs_header_nritems(eb))
12439                 goto next;
12440
12441         return err;
12442 }
12443
12444 /*
12445  * Helper function for later fs/subvol tree check.  To determine if a tree
12446  * block should be checked.
12447  * This function will ensure only the direct referencer with lowest rootid to
12448  * check a fs/subvolume tree block.
12449  *
12450  * Backref check at extent tree would detect errors like missing subvolume
12451  * tree, so we can do aggressive check to reduce duplicated checks.
12452  */
12453 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12454 {
12455         struct btrfs_root *extent_root = root->fs_info->extent_root;
12456         struct btrfs_key key;
12457         struct btrfs_path path;
12458         struct extent_buffer *leaf;
12459         int slot;
12460         struct btrfs_extent_item *ei;
12461         unsigned long ptr;
12462         unsigned long end;
12463         int type;
12464         u32 item_size;
12465         u64 offset;
12466         struct btrfs_extent_inline_ref *iref;
12467         int ret;
12468
12469         btrfs_init_path(&path);
12470         key.objectid = btrfs_header_bytenr(eb);
12471         key.type = BTRFS_METADATA_ITEM_KEY;
12472         key.offset = (u64)-1;
12473
12474         /*
12475          * Any failure in backref resolving means we can't determine
12476          * whom the tree block belongs to.
12477          * So in that case, we need to check that tree block
12478          */
12479         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12480         if (ret < 0)
12481                 goto need_check;
12482
12483         ret = btrfs_previous_extent_item(extent_root, &path,
12484                                          btrfs_header_bytenr(eb));
12485         if (ret)
12486                 goto need_check;
12487
12488         leaf = path.nodes[0];
12489         slot = path.slots[0];
12490         btrfs_item_key_to_cpu(leaf, &key, slot);
12491         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12492
12493         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12494                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12495         } else {
12496                 struct btrfs_tree_block_info *info;
12497
12498                 info = (struct btrfs_tree_block_info *)(ei + 1);
12499                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12500         }
12501
12502         item_size = btrfs_item_size_nr(leaf, slot);
12503         ptr = (unsigned long)iref;
12504         end = (unsigned long)ei + item_size;
12505         while (ptr < end) {
12506                 iref = (struct btrfs_extent_inline_ref *)ptr;
12507                 type = btrfs_extent_inline_ref_type(leaf, iref);
12508                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12509
12510                 /*
12511                  * We only check the tree block if current root is
12512                  * the lowest referencer of it.
12513                  */
12514                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12515                     offset < root->objectid) {
12516                         btrfs_release_path(&path);
12517                         return 0;
12518                 }
12519
12520                 ptr += btrfs_extent_inline_ref_size(type);
12521         }
12522         /*
12523          * Normally we should also check keyed tree block ref, but that may be
12524          * very time consuming.  Inlined ref should already make us skip a lot
12525          * of refs now.  So skip search keyed tree block ref.
12526          */
12527
12528 need_check:
12529         btrfs_release_path(&path);
12530         return 1;
12531 }
12532
12533 /*
12534  * Traversal function for tree block. We will do:
12535  * 1) Skip shared fs/subvolume tree blocks
12536  * 2) Update related bytes accounting
12537  * 3) Pre-order traversal
12538  */
12539 static int traverse_tree_block(struct btrfs_root *root,
12540                                 struct extent_buffer *node)
12541 {
12542         struct extent_buffer *eb;
12543         struct btrfs_key key;
12544         struct btrfs_key drop_key;
12545         int level;
12546         u64 nr;
12547         int i;
12548         int err = 0;
12549         int ret;
12550
12551         /*
12552          * Skip shared fs/subvolume tree block, in that case they will
12553          * be checked by referencer with lowest rootid
12554          */
12555         if (is_fstree(root->objectid) && !should_check(root, node))
12556                 return 0;
12557
12558         /* Update bytes accounting */
12559         total_btree_bytes += node->len;
12560         if (fs_root_objectid(btrfs_header_owner(node)))
12561                 total_fs_tree_bytes += node->len;
12562         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12563                 total_extent_tree_bytes += node->len;
12564
12565         /* pre-order tranversal, check itself first */
12566         level = btrfs_header_level(node);
12567         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12568                                    btrfs_header_level(node),
12569                                    btrfs_header_owner(node));
12570         err |= ret;
12571         if (err)
12572                 error(
12573         "check %s failed root %llu bytenr %llu level %d, force continue check",
12574                         level ? "node":"leaf", root->objectid,
12575                         btrfs_header_bytenr(node), btrfs_header_level(node));
12576
12577         if (!level) {
12578                 btree_space_waste += btrfs_leaf_free_space(root, node);
12579                 ret = check_leaf_items(root, node);
12580                 err |= ret;
12581                 return err;
12582         }
12583
12584         nr = btrfs_header_nritems(node);
12585         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12586         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12587                 sizeof(struct btrfs_key_ptr);
12588
12589         /* Then check all its children */
12590         for (i = 0; i < nr; i++) {
12591                 u64 blocknr = btrfs_node_blockptr(node, i);
12592
12593                 btrfs_node_key_to_cpu(node, &key, i);
12594                 if (level == root->root_item.drop_level &&
12595                     is_dropped_key(&key, &drop_key))
12596                         continue;
12597
12598                 /*
12599                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12600                  * to call the function itself.
12601                  */
12602                 eb = read_tree_block(root->fs_info, blocknr, 0);
12603                 if (extent_buffer_uptodate(eb)) {
12604                         ret = traverse_tree_block(root, eb);
12605                         err |= ret;
12606                 }
12607                 free_extent_buffer(eb);
12608         }
12609
12610         return err;
12611 }
12612
12613 /*
12614  * Low memory usage version check_chunks_and_extents.
12615  */
12616 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12617 {
12618         struct btrfs_path path;
12619         struct btrfs_key key;
12620         struct btrfs_root *root1;
12621         struct btrfs_root *root;
12622         struct btrfs_root *cur_root;
12623         int err = 0;
12624         int ret;
12625
12626         root = fs_info->fs_root;
12627
12628         root1 = root->fs_info->chunk_root;
12629         ret = traverse_tree_block(root1, root1->node);
12630         err |= ret;
12631
12632         root1 = root->fs_info->tree_root;
12633         ret = traverse_tree_block(root1, root1->node);
12634         err |= ret;
12635
12636         btrfs_init_path(&path);
12637         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12638         key.offset = 0;
12639         key.type = BTRFS_ROOT_ITEM_KEY;
12640
12641         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12642         if (ret) {
12643                 error("cannot find extent treet in tree_root");
12644                 goto out;
12645         }
12646
12647         while (1) {
12648                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12649                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12650                         goto next;
12651                 key.offset = (u64)-1;
12652
12653                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12654                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12655                                         &key);
12656                 else
12657                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12658                 if (IS_ERR(cur_root) || !cur_root) {
12659                         error("failed to read tree: %lld", key.objectid);
12660                         goto next;
12661                 }
12662
12663                 ret = traverse_tree_block(cur_root, cur_root->node);
12664                 err |= ret;
12665
12666                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12667                         btrfs_free_fs_root(cur_root);
12668 next:
12669                 ret = btrfs_next_item(root1, &path);
12670                 if (ret)
12671                         goto out;
12672         }
12673
12674 out:
12675         btrfs_release_path(&path);
12676         return err;
12677 }
12678
12679 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12680 {
12681         int ret;
12682
12683         if (!ctx.progress_enabled)
12684                 fprintf(stderr, "checking extents\n");
12685         if (check_mode == CHECK_MODE_LOWMEM)
12686                 ret = check_chunks_and_extents_v2(fs_info);
12687         else
12688                 ret = check_chunks_and_extents(fs_info);
12689
12690         return ret;
12691 }
12692
12693 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12694                            struct btrfs_root *root, int overwrite)
12695 {
12696         struct extent_buffer *c;
12697         struct extent_buffer *old = root->node;
12698         int level;
12699         int ret;
12700         struct btrfs_disk_key disk_key = {0,0,0};
12701
12702         level = 0;
12703
12704         if (overwrite) {
12705                 c = old;
12706                 extent_buffer_get(c);
12707                 goto init;
12708         }
12709         c = btrfs_alloc_free_block(trans, root,
12710                                    root->fs_info->nodesize,
12711                                    root->root_key.objectid,
12712                                    &disk_key, level, 0, 0);
12713         if (IS_ERR(c)) {
12714                 c = old;
12715                 extent_buffer_get(c);
12716                 overwrite = 1;
12717         }
12718 init:
12719         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12720         btrfs_set_header_level(c, level);
12721         btrfs_set_header_bytenr(c, c->start);
12722         btrfs_set_header_generation(c, trans->transid);
12723         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12724         btrfs_set_header_owner(c, root->root_key.objectid);
12725
12726         write_extent_buffer(c, root->fs_info->fsid,
12727                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12728
12729         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12730                             btrfs_header_chunk_tree_uuid(c),
12731                             BTRFS_UUID_SIZE);
12732
12733         btrfs_mark_buffer_dirty(c);
12734         /*
12735          * this case can happen in the following case:
12736          *
12737          * 1.overwrite previous root.
12738          *
12739          * 2.reinit reloc data root, this is because we skip pin
12740          * down reloc data tree before which means we can allocate
12741          * same block bytenr here.
12742          */
12743         if (old->start == c->start) {
12744                 btrfs_set_root_generation(&root->root_item,
12745                                           trans->transid);
12746                 root->root_item.level = btrfs_header_level(root->node);
12747                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12748                                         &root->root_key, &root->root_item);
12749                 if (ret) {
12750                         free_extent_buffer(c);
12751                         return ret;
12752                 }
12753         }
12754         free_extent_buffer(old);
12755         root->node = c;
12756         add_root_to_dirty_list(root);
12757         return 0;
12758 }
12759
12760 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12761                                 struct extent_buffer *eb, int tree_root)
12762 {
12763         struct extent_buffer *tmp;
12764         struct btrfs_root_item *ri;
12765         struct btrfs_key key;
12766         u64 bytenr;
12767         int level = btrfs_header_level(eb);
12768         int nritems;
12769         int ret;
12770         int i;
12771
12772         /*
12773          * If we have pinned this block before, don't pin it again.
12774          * This can not only avoid forever loop with broken filesystem
12775          * but also give us some speedups.
12776          */
12777         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12778                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12779                 return 0;
12780
12781         btrfs_pin_extent(fs_info, eb->start, eb->len);
12782
12783         nritems = btrfs_header_nritems(eb);
12784         for (i = 0; i < nritems; i++) {
12785                 if (level == 0) {
12786                         btrfs_item_key_to_cpu(eb, &key, i);
12787                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12788                                 continue;
12789                         /* Skip the extent root and reloc roots */
12790                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12791                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12792                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12793                                 continue;
12794                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12795                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12796
12797                         /*
12798                          * If at any point we start needing the real root we
12799                          * will have to build a stump root for the root we are
12800                          * in, but for now this doesn't actually use the root so
12801                          * just pass in extent_root.
12802                          */
12803                         tmp = read_tree_block(fs_info, bytenr, 0);
12804                         if (!extent_buffer_uptodate(tmp)) {
12805                                 fprintf(stderr, "Error reading root block\n");
12806                                 return -EIO;
12807                         }
12808                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12809                         free_extent_buffer(tmp);
12810                         if (ret)
12811                                 return ret;
12812                 } else {
12813                         bytenr = btrfs_node_blockptr(eb, i);
12814
12815                         /* If we aren't the tree root don't read the block */
12816                         if (level == 1 && !tree_root) {
12817                                 btrfs_pin_extent(fs_info, bytenr,
12818                                                 fs_info->nodesize);
12819                                 continue;
12820                         }
12821
12822                         tmp = read_tree_block(fs_info, bytenr, 0);
12823                         if (!extent_buffer_uptodate(tmp)) {
12824                                 fprintf(stderr, "Error reading tree block\n");
12825                                 return -EIO;
12826                         }
12827                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12828                         free_extent_buffer(tmp);
12829                         if (ret)
12830                                 return ret;
12831                 }
12832         }
12833
12834         return 0;
12835 }
12836
12837 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12838 {
12839         int ret;
12840
12841         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12842         if (ret)
12843                 return ret;
12844
12845         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12846 }
12847
12848 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12849 {
12850         struct btrfs_block_group_cache *cache;
12851         struct btrfs_path path;
12852         struct extent_buffer *leaf;
12853         struct btrfs_chunk *chunk;
12854         struct btrfs_key key;
12855         int ret;
12856         u64 start;
12857
12858         btrfs_init_path(&path);
12859         key.objectid = 0;
12860         key.type = BTRFS_CHUNK_ITEM_KEY;
12861         key.offset = 0;
12862         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12863         if (ret < 0) {
12864                 btrfs_release_path(&path);
12865                 return ret;
12866         }
12867
12868         /*
12869          * We do this in case the block groups were screwed up and had alloc
12870          * bits that aren't actually set on the chunks.  This happens with
12871          * restored images every time and could happen in real life I guess.
12872          */
12873         fs_info->avail_data_alloc_bits = 0;
12874         fs_info->avail_metadata_alloc_bits = 0;
12875         fs_info->avail_system_alloc_bits = 0;
12876
12877         /* First we need to create the in-memory block groups */
12878         while (1) {
12879                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12880                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12881                         if (ret < 0) {
12882                                 btrfs_release_path(&path);
12883                                 return ret;
12884                         }
12885                         if (ret) {
12886                                 ret = 0;
12887                                 break;
12888                         }
12889                 }
12890                 leaf = path.nodes[0];
12891                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12892                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12893                         path.slots[0]++;
12894                         continue;
12895                 }
12896
12897                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12898                 btrfs_add_block_group(fs_info, 0,
12899                                       btrfs_chunk_type(leaf, chunk),
12900                                       key.objectid, key.offset,
12901                                       btrfs_chunk_length(leaf, chunk));
12902                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12903                                  key.offset + btrfs_chunk_length(leaf, chunk));
12904                 path.slots[0]++;
12905         }
12906         start = 0;
12907         while (1) {
12908                 cache = btrfs_lookup_first_block_group(fs_info, start);
12909                 if (!cache)
12910                         break;
12911                 cache->cached = 1;
12912                 start = cache->key.objectid + cache->key.offset;
12913         }
12914
12915         btrfs_release_path(&path);
12916         return 0;
12917 }
12918
12919 static int reset_balance(struct btrfs_trans_handle *trans,
12920                          struct btrfs_fs_info *fs_info)
12921 {
12922         struct btrfs_root *root = fs_info->tree_root;
12923         struct btrfs_path path;
12924         struct extent_buffer *leaf;
12925         struct btrfs_key key;
12926         int del_slot, del_nr = 0;
12927         int ret;
12928         int found = 0;
12929
12930         btrfs_init_path(&path);
12931         key.objectid = BTRFS_BALANCE_OBJECTID;
12932         key.type = BTRFS_BALANCE_ITEM_KEY;
12933         key.offset = 0;
12934         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12935         if (ret) {
12936                 if (ret > 0)
12937                         ret = 0;
12938                 if (!ret)
12939                         goto reinit_data_reloc;
12940                 else
12941                         goto out;
12942         }
12943
12944         ret = btrfs_del_item(trans, root, &path);
12945         if (ret)
12946                 goto out;
12947         btrfs_release_path(&path);
12948
12949         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12950         key.type = BTRFS_ROOT_ITEM_KEY;
12951         key.offset = 0;
12952         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12953         if (ret < 0)
12954                 goto out;
12955         while (1) {
12956                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12957                         if (!found)
12958                                 break;
12959
12960                         if (del_nr) {
12961                                 ret = btrfs_del_items(trans, root, &path,
12962                                                       del_slot, del_nr);
12963                                 del_nr = 0;
12964                                 if (ret)
12965                                         goto out;
12966                         }
12967                         key.offset++;
12968                         btrfs_release_path(&path);
12969
12970                         found = 0;
12971                         ret = btrfs_search_slot(trans, root, &key, &path,
12972                                                 -1, 1);
12973                         if (ret < 0)
12974                                 goto out;
12975                         continue;
12976                 }
12977                 found = 1;
12978                 leaf = path.nodes[0];
12979                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12980                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12981                         break;
12982                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12983                         path.slots[0]++;
12984                         continue;
12985                 }
12986                 if (!del_nr) {
12987                         del_slot = path.slots[0];
12988                         del_nr = 1;
12989                 } else {
12990                         del_nr++;
12991                 }
12992                 path.slots[0]++;
12993         }
12994
12995         if (del_nr) {
12996                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12997                 if (ret)
12998                         goto out;
12999         }
13000         btrfs_release_path(&path);
13001
13002 reinit_data_reloc:
13003         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13004         key.type = BTRFS_ROOT_ITEM_KEY;
13005         key.offset = (u64)-1;
13006         root = btrfs_read_fs_root(fs_info, &key);
13007         if (IS_ERR(root)) {
13008                 fprintf(stderr, "Error reading data reloc tree\n");
13009                 ret = PTR_ERR(root);
13010                 goto out;
13011         }
13012         record_root_in_trans(trans, root);
13013         ret = btrfs_fsck_reinit_root(trans, root, 0);
13014         if (ret)
13015                 goto out;
13016         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13017 out:
13018         btrfs_release_path(&path);
13019         return ret;
13020 }
13021
13022 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13023                               struct btrfs_fs_info *fs_info)
13024 {
13025         u64 start = 0;
13026         int ret;
13027
13028         /*
13029          * The only reason we don't do this is because right now we're just
13030          * walking the trees we find and pinning down their bytes, we don't look
13031          * at any of the leaves.  In order to do mixed groups we'd have to check
13032          * the leaves of any fs roots and pin down the bytes for any file
13033          * extents we find.  Not hard but why do it if we don't have to?
13034          */
13035         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13036                 fprintf(stderr, "We don't support re-initing the extent tree "
13037                         "for mixed block groups yet, please notify a btrfs "
13038                         "developer you want to do this so they can add this "
13039                         "functionality.\n");
13040                 return -EINVAL;
13041         }
13042
13043         /*
13044          * first we need to walk all of the trees except the extent tree and pin
13045          * down the bytes that are in use so we don't overwrite any existing
13046          * metadata.
13047          */
13048         ret = pin_metadata_blocks(fs_info);
13049         if (ret) {
13050                 fprintf(stderr, "error pinning down used bytes\n");
13051                 return ret;
13052         }
13053
13054         /*
13055          * Need to drop all the block groups since we're going to recreate all
13056          * of them again.
13057          */
13058         btrfs_free_block_groups(fs_info);
13059         ret = reset_block_groups(fs_info);
13060         if (ret) {
13061                 fprintf(stderr, "error resetting the block groups\n");
13062                 return ret;
13063         }
13064
13065         /* Ok we can allocate now, reinit the extent root */
13066         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13067         if (ret) {
13068                 fprintf(stderr, "extent root initialization failed\n");
13069                 /*
13070                  * When the transaction code is updated we should end the
13071                  * transaction, but for now progs only knows about commit so
13072                  * just return an error.
13073                  */
13074                 return ret;
13075         }
13076
13077         /*
13078          * Now we have all the in-memory block groups setup so we can make
13079          * allocations properly, and the metadata we care about is safe since we
13080          * pinned all of it above.
13081          */
13082         while (1) {
13083                 struct btrfs_block_group_cache *cache;
13084
13085                 cache = btrfs_lookup_first_block_group(fs_info, start);
13086                 if (!cache)
13087                         break;
13088                 start = cache->key.objectid + cache->key.offset;
13089                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13090                                         &cache->key, &cache->item,
13091                                         sizeof(cache->item));
13092                 if (ret) {
13093                         fprintf(stderr, "Error adding block group\n");
13094                         return ret;
13095                 }
13096                 btrfs_extent_post_op(trans, fs_info->extent_root);
13097         }
13098
13099         ret = reset_balance(trans, fs_info);
13100         if (ret)
13101                 fprintf(stderr, "error resetting the pending balance\n");
13102
13103         return ret;
13104 }
13105
13106 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13107 {
13108         struct btrfs_path path;
13109         struct btrfs_trans_handle *trans;
13110         struct btrfs_key key;
13111         int ret;
13112
13113         printf("Recowing metadata block %llu\n", eb->start);
13114         key.objectid = btrfs_header_owner(eb);
13115         key.type = BTRFS_ROOT_ITEM_KEY;
13116         key.offset = (u64)-1;
13117
13118         root = btrfs_read_fs_root(root->fs_info, &key);
13119         if (IS_ERR(root)) {
13120                 fprintf(stderr, "Couldn't find owner root %llu\n",
13121                         key.objectid);
13122                 return PTR_ERR(root);
13123         }
13124
13125         trans = btrfs_start_transaction(root, 1);
13126         if (IS_ERR(trans))
13127                 return PTR_ERR(trans);
13128
13129         btrfs_init_path(&path);
13130         path.lowest_level = btrfs_header_level(eb);
13131         if (path.lowest_level)
13132                 btrfs_node_key_to_cpu(eb, &key, 0);
13133         else
13134                 btrfs_item_key_to_cpu(eb, &key, 0);
13135
13136         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13137         btrfs_commit_transaction(trans, root);
13138         btrfs_release_path(&path);
13139         return ret;
13140 }
13141
13142 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13143 {
13144         struct btrfs_path path;
13145         struct btrfs_trans_handle *trans;
13146         struct btrfs_key key;
13147         int ret;
13148
13149         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13150                bad->key.type, bad->key.offset);
13151         key.objectid = bad->root_id;
13152         key.type = BTRFS_ROOT_ITEM_KEY;
13153         key.offset = (u64)-1;
13154
13155         root = btrfs_read_fs_root(root->fs_info, &key);
13156         if (IS_ERR(root)) {
13157                 fprintf(stderr, "Couldn't find owner root %llu\n",
13158                         key.objectid);
13159                 return PTR_ERR(root);
13160         }
13161
13162         trans = btrfs_start_transaction(root, 1);
13163         if (IS_ERR(trans))
13164                 return PTR_ERR(trans);
13165
13166         btrfs_init_path(&path);
13167         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13168         if (ret) {
13169                 if (ret > 0)
13170                         ret = 0;
13171                 goto out;
13172         }
13173         ret = btrfs_del_item(trans, root, &path);
13174 out:
13175         btrfs_commit_transaction(trans, root);
13176         btrfs_release_path(&path);
13177         return ret;
13178 }
13179
13180 static int zero_log_tree(struct btrfs_root *root)
13181 {
13182         struct btrfs_trans_handle *trans;
13183         int ret;
13184
13185         trans = btrfs_start_transaction(root, 1);
13186         if (IS_ERR(trans)) {
13187                 ret = PTR_ERR(trans);
13188                 return ret;
13189         }
13190         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13191         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13192         ret = btrfs_commit_transaction(trans, root);
13193         return ret;
13194 }
13195
13196 static int populate_csum(struct btrfs_trans_handle *trans,
13197                          struct btrfs_root *csum_root, char *buf, u64 start,
13198                          u64 len)
13199 {
13200         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13201         u64 offset = 0;
13202         u64 sectorsize;
13203         int ret = 0;
13204
13205         while (offset < len) {
13206                 sectorsize = fs_info->sectorsize;
13207                 ret = read_extent_data(fs_info, buf, start + offset,
13208                                        &sectorsize, 0);
13209                 if (ret)
13210                         break;
13211                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13212                                             start + offset, buf, sectorsize);
13213                 if (ret)
13214                         break;
13215                 offset += sectorsize;
13216         }
13217         return ret;
13218 }
13219
13220 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13221                                       struct btrfs_root *csum_root,
13222                                       struct btrfs_root *cur_root)
13223 {
13224         struct btrfs_path path;
13225         struct btrfs_key key;
13226         struct extent_buffer *node;
13227         struct btrfs_file_extent_item *fi;
13228         char *buf = NULL;
13229         u64 start = 0;
13230         u64 len = 0;
13231         int slot = 0;
13232         int ret = 0;
13233
13234         buf = malloc(cur_root->fs_info->sectorsize);
13235         if (!buf)
13236                 return -ENOMEM;
13237
13238         btrfs_init_path(&path);
13239         key.objectid = 0;
13240         key.offset = 0;
13241         key.type = 0;
13242         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13243         if (ret < 0)
13244                 goto out;
13245         /* Iterate all regular file extents and fill its csum */
13246         while (1) {
13247                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13248
13249                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13250                         goto next;
13251                 node = path.nodes[0];
13252                 slot = path.slots[0];
13253                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13254                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13255                         goto next;
13256                 start = btrfs_file_extent_disk_bytenr(node, fi);
13257                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13258
13259                 ret = populate_csum(trans, csum_root, buf, start, len);
13260                 if (ret == -EEXIST)
13261                         ret = 0;
13262                 if (ret < 0)
13263                         goto out;
13264 next:
13265                 /*
13266                  * TODO: if next leaf is corrupted, jump to nearest next valid
13267                  * leaf.
13268                  */
13269                 ret = btrfs_next_item(cur_root, &path);
13270                 if (ret < 0)
13271                         goto out;
13272                 if (ret > 0) {
13273                         ret = 0;
13274                         goto out;
13275                 }
13276         }
13277
13278 out:
13279         btrfs_release_path(&path);
13280         free(buf);
13281         return ret;
13282 }
13283
13284 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13285                                   struct btrfs_root *csum_root)
13286 {
13287         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13288         struct btrfs_path path;
13289         struct btrfs_root *tree_root = fs_info->tree_root;
13290         struct btrfs_root *cur_root;
13291         struct extent_buffer *node;
13292         struct btrfs_key key;
13293         int slot = 0;
13294         int ret = 0;
13295
13296         btrfs_init_path(&path);
13297         key.objectid = BTRFS_FS_TREE_OBJECTID;
13298         key.offset = 0;
13299         key.type = BTRFS_ROOT_ITEM_KEY;
13300         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13301         if (ret < 0)
13302                 goto out;
13303         if (ret > 0) {
13304                 ret = -ENOENT;
13305                 goto out;
13306         }
13307
13308         while (1) {
13309                 node = path.nodes[0];
13310                 slot = path.slots[0];
13311                 btrfs_item_key_to_cpu(node, &key, slot);
13312                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13313                         goto out;
13314                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13315                         goto next;
13316                 if (!is_fstree(key.objectid))
13317                         goto next;
13318                 key.offset = (u64)-1;
13319
13320                 cur_root = btrfs_read_fs_root(fs_info, &key);
13321                 if (IS_ERR(cur_root) || !cur_root) {
13322                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13323                                 key.objectid);
13324                         goto out;
13325                 }
13326                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13327                                 cur_root);
13328                 if (ret < 0)
13329                         goto out;
13330 next:
13331                 ret = btrfs_next_item(tree_root, &path);
13332                 if (ret > 0) {
13333                         ret = 0;
13334                         goto out;
13335                 }
13336                 if (ret < 0)
13337                         goto out;
13338         }
13339
13340 out:
13341         btrfs_release_path(&path);
13342         return ret;
13343 }
13344
13345 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13346                                       struct btrfs_root *csum_root)
13347 {
13348         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13349         struct btrfs_path path;
13350         struct btrfs_extent_item *ei;
13351         struct extent_buffer *leaf;
13352         char *buf;
13353         struct btrfs_key key;
13354         int ret;
13355
13356         btrfs_init_path(&path);
13357         key.objectid = 0;
13358         key.type = BTRFS_EXTENT_ITEM_KEY;
13359         key.offset = 0;
13360         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13361         if (ret < 0) {
13362                 btrfs_release_path(&path);
13363                 return ret;
13364         }
13365
13366         buf = malloc(csum_root->fs_info->sectorsize);
13367         if (!buf) {
13368                 btrfs_release_path(&path);
13369                 return -ENOMEM;
13370         }
13371
13372         while (1) {
13373                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13374                         ret = btrfs_next_leaf(extent_root, &path);
13375                         if (ret < 0)
13376                                 break;
13377                         if (ret) {
13378                                 ret = 0;
13379                                 break;
13380                         }
13381                 }
13382                 leaf = path.nodes[0];
13383
13384                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13385                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13386                         path.slots[0]++;
13387                         continue;
13388                 }
13389
13390                 ei = btrfs_item_ptr(leaf, path.slots[0],
13391                                     struct btrfs_extent_item);
13392                 if (!(btrfs_extent_flags(leaf, ei) &
13393                       BTRFS_EXTENT_FLAG_DATA)) {
13394                         path.slots[0]++;
13395                         continue;
13396                 }
13397
13398                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13399                                     key.offset);
13400                 if (ret)
13401                         break;
13402                 path.slots[0]++;
13403         }
13404
13405         btrfs_release_path(&path);
13406         free(buf);
13407         return ret;
13408 }
13409
13410 /*
13411  * Recalculate the csum and put it into the csum tree.
13412  *
13413  * Extent tree init will wipe out all the extent info, so in that case, we
13414  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13415  * will use fs/subvol trees to init the csum tree.
13416  */
13417 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13418                           struct btrfs_root *csum_root,
13419                           int search_fs_tree)
13420 {
13421         if (search_fs_tree)
13422                 return fill_csum_tree_from_fs(trans, csum_root);
13423         else
13424                 return fill_csum_tree_from_extent(trans, csum_root);
13425 }
13426
13427 static void free_roots_info_cache(void)
13428 {
13429         if (!roots_info_cache)
13430                 return;
13431
13432         while (!cache_tree_empty(roots_info_cache)) {
13433                 struct cache_extent *entry;
13434                 struct root_item_info *rii;
13435
13436                 entry = first_cache_extent(roots_info_cache);
13437                 if (!entry)
13438                         break;
13439                 remove_cache_extent(roots_info_cache, entry);
13440                 rii = container_of(entry, struct root_item_info, cache_extent);
13441                 free(rii);
13442         }
13443
13444         free(roots_info_cache);
13445         roots_info_cache = NULL;
13446 }
13447
13448 static int build_roots_info_cache(struct btrfs_fs_info *info)
13449 {
13450         int ret = 0;
13451         struct btrfs_key key;
13452         struct extent_buffer *leaf;
13453         struct btrfs_path path;
13454
13455         if (!roots_info_cache) {
13456                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13457                 if (!roots_info_cache)
13458                         return -ENOMEM;
13459                 cache_tree_init(roots_info_cache);
13460         }
13461
13462         btrfs_init_path(&path);
13463         key.objectid = 0;
13464         key.type = BTRFS_EXTENT_ITEM_KEY;
13465         key.offset = 0;
13466         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13467         if (ret < 0)
13468                 goto out;
13469         leaf = path.nodes[0];
13470
13471         while (1) {
13472                 struct btrfs_key found_key;
13473                 struct btrfs_extent_item *ei;
13474                 struct btrfs_extent_inline_ref *iref;
13475                 int slot = path.slots[0];
13476                 int type;
13477                 u64 flags;
13478                 u64 root_id;
13479                 u8 level;
13480                 struct cache_extent *entry;
13481                 struct root_item_info *rii;
13482
13483                 if (slot >= btrfs_header_nritems(leaf)) {
13484                         ret = btrfs_next_leaf(info->extent_root, &path);
13485                         if (ret < 0) {
13486                                 break;
13487                         } else if (ret) {
13488                                 ret = 0;
13489                                 break;
13490                         }
13491                         leaf = path.nodes[0];
13492                         slot = path.slots[0];
13493                 }
13494
13495                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13496
13497                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13498                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13499                         goto next;
13500
13501                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13502                 flags = btrfs_extent_flags(leaf, ei);
13503
13504                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13505                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13506                         goto next;
13507
13508                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13509                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13510                         level = found_key.offset;
13511                 } else {
13512                         struct btrfs_tree_block_info *binfo;
13513
13514                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13515                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13516                         level = btrfs_tree_block_level(leaf, binfo);
13517                 }
13518
13519                 /*
13520                  * For a root extent, it must be of the following type and the
13521                  * first (and only one) iref in the item.
13522                  */
13523                 type = btrfs_extent_inline_ref_type(leaf, iref);
13524                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13525                         goto next;
13526
13527                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13528                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13529                 if (!entry) {
13530                         rii = malloc(sizeof(struct root_item_info));
13531                         if (!rii) {
13532                                 ret = -ENOMEM;
13533                                 goto out;
13534                         }
13535                         rii->cache_extent.start = root_id;
13536                         rii->cache_extent.size = 1;
13537                         rii->level = (u8)-1;
13538                         entry = &rii->cache_extent;
13539                         ret = insert_cache_extent(roots_info_cache, entry);
13540                         ASSERT(ret == 0);
13541                 } else {
13542                         rii = container_of(entry, struct root_item_info,
13543                                            cache_extent);
13544                 }
13545
13546                 ASSERT(rii->cache_extent.start == root_id);
13547                 ASSERT(rii->cache_extent.size == 1);
13548
13549                 if (level > rii->level || rii->level == (u8)-1) {
13550                         rii->level = level;
13551                         rii->bytenr = found_key.objectid;
13552                         rii->gen = btrfs_extent_generation(leaf, ei);
13553                         rii->node_count = 1;
13554                 } else if (level == rii->level) {
13555                         rii->node_count++;
13556                 }
13557 next:
13558                 path.slots[0]++;
13559         }
13560
13561 out:
13562         btrfs_release_path(&path);
13563
13564         return ret;
13565 }
13566
13567 static int maybe_repair_root_item(struct btrfs_path *path,
13568                                   const struct btrfs_key *root_key,
13569                                   const int read_only_mode)
13570 {
13571         const u64 root_id = root_key->objectid;
13572         struct cache_extent *entry;
13573         struct root_item_info *rii;
13574         struct btrfs_root_item ri;
13575         unsigned long offset;
13576
13577         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13578         if (!entry) {
13579                 fprintf(stderr,
13580                         "Error: could not find extent items for root %llu\n",
13581                         root_key->objectid);
13582                 return -ENOENT;
13583         }
13584
13585         rii = container_of(entry, struct root_item_info, cache_extent);
13586         ASSERT(rii->cache_extent.start == root_id);
13587         ASSERT(rii->cache_extent.size == 1);
13588
13589         if (rii->node_count != 1) {
13590                 fprintf(stderr,
13591                         "Error: could not find btree root extent for root %llu\n",
13592                         root_id);
13593                 return -ENOENT;
13594         }
13595
13596         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13597         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13598
13599         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13600             btrfs_root_level(&ri) != rii->level ||
13601             btrfs_root_generation(&ri) != rii->gen) {
13602
13603                 /*
13604                  * If we're in repair mode but our caller told us to not update
13605                  * the root item, i.e. just check if it needs to be updated, don't
13606                  * print this message, since the caller will call us again shortly
13607                  * for the same root item without read only mode (the caller will
13608                  * open a transaction first).
13609                  */
13610                 if (!(read_only_mode && repair))
13611                         fprintf(stderr,
13612                                 "%sroot item for root %llu,"
13613                                 " current bytenr %llu, current gen %llu, current level %u,"
13614                                 " new bytenr %llu, new gen %llu, new level %u\n",
13615                                 (read_only_mode ? "" : "fixing "),
13616                                 root_id,
13617                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13618                                 btrfs_root_level(&ri),
13619                                 rii->bytenr, rii->gen, rii->level);
13620
13621                 if (btrfs_root_generation(&ri) > rii->gen) {
13622                         fprintf(stderr,
13623                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13624                                 root_id, btrfs_root_generation(&ri), rii->gen);
13625                         return -EINVAL;
13626                 }
13627
13628                 if (!read_only_mode) {
13629                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13630                         btrfs_set_root_level(&ri, rii->level);
13631                         btrfs_set_root_generation(&ri, rii->gen);
13632                         write_extent_buffer(path->nodes[0], &ri,
13633                                             offset, sizeof(ri));
13634                 }
13635
13636                 return 1;
13637         }
13638
13639         return 0;
13640 }
13641
13642 /*
13643  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13644  * caused read-only snapshots to be corrupted if they were created at a moment
13645  * when the source subvolume/snapshot had orphan items. The issue was that the
13646  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13647  * node instead of the post orphan cleanup root node.
13648  * So this function, and its callees, just detects and fixes those cases. Even
13649  * though the regression was for read-only snapshots, this function applies to
13650  * any snapshot/subvolume root.
13651  * This must be run before any other repair code - not doing it so, makes other
13652  * repair code delete or modify backrefs in the extent tree for example, which
13653  * will result in an inconsistent fs after repairing the root items.
13654  */
13655 static int repair_root_items(struct btrfs_fs_info *info)
13656 {
13657         struct btrfs_path path;
13658         struct btrfs_key key;
13659         struct extent_buffer *leaf;
13660         struct btrfs_trans_handle *trans = NULL;
13661         int ret = 0;
13662         int bad_roots = 0;
13663         int need_trans = 0;
13664
13665         btrfs_init_path(&path);
13666
13667         ret = build_roots_info_cache(info);
13668         if (ret)
13669                 goto out;
13670
13671         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13672         key.type = BTRFS_ROOT_ITEM_KEY;
13673         key.offset = 0;
13674
13675 again:
13676         /*
13677          * Avoid opening and committing transactions if a leaf doesn't have
13678          * any root items that need to be fixed, so that we avoid rotating
13679          * backup roots unnecessarily.
13680          */
13681         if (need_trans) {
13682                 trans = btrfs_start_transaction(info->tree_root, 1);
13683                 if (IS_ERR(trans)) {
13684                         ret = PTR_ERR(trans);
13685                         goto out;
13686                 }
13687         }
13688
13689         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13690                                 0, trans ? 1 : 0);
13691         if (ret < 0)
13692                 goto out;
13693         leaf = path.nodes[0];
13694
13695         while (1) {
13696                 struct btrfs_key found_key;
13697
13698                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13699                         int no_more_keys = find_next_key(&path, &key);
13700
13701                         btrfs_release_path(&path);
13702                         if (trans) {
13703                                 ret = btrfs_commit_transaction(trans,
13704                                                                info->tree_root);
13705                                 trans = NULL;
13706                                 if (ret < 0)
13707                                         goto out;
13708                         }
13709                         need_trans = 0;
13710                         if (no_more_keys)
13711                                 break;
13712                         goto again;
13713                 }
13714
13715                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13716
13717                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13718                         goto next;
13719                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13720                         goto next;
13721
13722                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13723                 if (ret < 0)
13724                         goto out;
13725                 if (ret) {
13726                         if (!trans && repair) {
13727                                 need_trans = 1;
13728                                 key = found_key;
13729                                 btrfs_release_path(&path);
13730                                 goto again;
13731                         }
13732                         bad_roots++;
13733                 }
13734 next:
13735                 path.slots[0]++;
13736         }
13737         ret = 0;
13738 out:
13739         free_roots_info_cache();
13740         btrfs_release_path(&path);
13741         if (trans)
13742                 btrfs_commit_transaction(trans, info->tree_root);
13743         if (ret < 0)
13744                 return ret;
13745
13746         return bad_roots;
13747 }
13748
13749 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13750 {
13751         struct btrfs_trans_handle *trans;
13752         struct btrfs_block_group_cache *bg_cache;
13753         u64 current = 0;
13754         int ret = 0;
13755
13756         /* Clear all free space cache inodes and its extent data */
13757         while (1) {
13758                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13759                 if (!bg_cache)
13760                         break;
13761                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13762                 if (ret < 0)
13763                         return ret;
13764                 current = bg_cache->key.objectid + bg_cache->key.offset;
13765         }
13766
13767         /* Don't forget to set cache_generation to -1 */
13768         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13769         if (IS_ERR(trans)) {
13770                 error("failed to update super block cache generation");
13771                 return PTR_ERR(trans);
13772         }
13773         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13774         btrfs_commit_transaction(trans, fs_info->tree_root);
13775
13776         return ret;
13777 }
13778
13779 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13780                 int clear_version)
13781 {
13782         int ret = 0;
13783
13784         if (clear_version == 1) {
13785                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13786                         error(
13787                 "free space cache v2 detected, use --clear-space-cache v2");
13788                         ret = 1;
13789                         goto close_out;
13790                 }
13791                 printf("Clearing free space cache\n");
13792                 ret = clear_free_space_cache(fs_info);
13793                 if (ret) {
13794                         error("failed to clear free space cache");
13795                         ret = 1;
13796                 } else {
13797                         printf("Free space cache cleared\n");
13798                 }
13799         } else if (clear_version == 2) {
13800                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13801                         printf("no free space cache v2 to clear\n");
13802                         ret = 0;
13803                         goto close_out;
13804                 }
13805                 printf("Clear free space cache v2\n");
13806                 ret = btrfs_clear_free_space_tree(fs_info);
13807                 if (ret) {
13808                         error("failed to clear free space cache v2: %d", ret);
13809                         ret = 1;
13810                 } else {
13811                         printf("free space cache v2 cleared\n");
13812                 }
13813         }
13814 close_out:
13815         return ret;
13816 }
13817
13818 const char * const cmd_check_usage[] = {
13819         "btrfs check [options] <device>",
13820         "Check structural integrity of a filesystem (unmounted).",
13821         "Check structural integrity of an unmounted filesystem. Verify internal",
13822         "trees' consistency and item connectivity. In the repair mode try to",
13823         "fix the problems found. ",
13824         "WARNING: the repair mode is considered dangerous",
13825         "",
13826         "-s|--super <superblock>     use this superblock copy",
13827         "-b|--backup                 use the first valid backup root copy",
13828         "--force                     skip mount checks, repair is not possible",
13829         "--repair                    try to repair the filesystem",
13830         "--readonly                  run in read-only mode (default)",
13831         "--init-csum-tree            create a new CRC tree",
13832         "--init-extent-tree          create a new extent tree",
13833         "--mode <MODE>               allows choice of memory/IO trade-offs",
13834         "                            where MODE is one of:",
13835         "                            original - read inodes and extents to memory (requires",
13836         "                                       more memory, does less IO)",
13837         "                            lowmem   - try to use less memory but read blocks again",
13838         "                                       when needed",
13839         "--check-data-csum           verify checksums of data blocks",
13840         "-Q|--qgroup-report          print a report on qgroup consistency",
13841         "-E|--subvol-extents <subvolid>",
13842         "                            print subvolume extents and sharing state",
13843         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13844         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13845         "-p|--progress               indicate progress",
13846         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13847         NULL
13848 };
13849
13850 int cmd_check(int argc, char **argv)
13851 {
13852         struct cache_tree root_cache;
13853         struct btrfs_root *root;
13854         struct btrfs_fs_info *info;
13855         u64 bytenr = 0;
13856         u64 subvolid = 0;
13857         u64 tree_root_bytenr = 0;
13858         u64 chunk_root_bytenr = 0;
13859         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13860         int ret = 0;
13861         int err = 0;
13862         u64 num;
13863         int init_csum_tree = 0;
13864         int readonly = 0;
13865         int clear_space_cache = 0;
13866         int qgroup_report = 0;
13867         int qgroups_repaired = 0;
13868         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13869         int force = 0;
13870
13871         while(1) {
13872                 int c;
13873                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13874                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13875                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13876                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13877                         GETOPT_VAL_FORCE };
13878                 static const struct option long_options[] = {
13879                         { "super", required_argument, NULL, 's' },
13880                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13881                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13882                         { "init-csum-tree", no_argument, NULL,
13883                                 GETOPT_VAL_INIT_CSUM },
13884                         { "init-extent-tree", no_argument, NULL,
13885                                 GETOPT_VAL_INIT_EXTENT },
13886                         { "check-data-csum", no_argument, NULL,
13887                                 GETOPT_VAL_CHECK_CSUM },
13888                         { "backup", no_argument, NULL, 'b' },
13889                         { "subvol-extents", required_argument, NULL, 'E' },
13890                         { "qgroup-report", no_argument, NULL, 'Q' },
13891                         { "tree-root", required_argument, NULL, 'r' },
13892                         { "chunk-root", required_argument, NULL,
13893                                 GETOPT_VAL_CHUNK_TREE },
13894                         { "progress", no_argument, NULL, 'p' },
13895                         { "mode", required_argument, NULL,
13896                                 GETOPT_VAL_MODE },
13897                         { "clear-space-cache", required_argument, NULL,
13898                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13899                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13900                         { NULL, 0, NULL, 0}
13901                 };
13902
13903                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13904                 if (c < 0)
13905                         break;
13906                 switch(c) {
13907                         case 'a': /* ignored */ break;
13908                         case 'b':
13909                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13910                                 break;
13911                         case 's':
13912                                 num = arg_strtou64(optarg);
13913                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13914                                         error(
13915                                         "super mirror should be less than %d",
13916                                                 BTRFS_SUPER_MIRROR_MAX);
13917                                         exit(1);
13918                                 }
13919                                 bytenr = btrfs_sb_offset(((int)num));
13920                                 printf("using SB copy %llu, bytenr %llu\n", num,
13921                                        (unsigned long long)bytenr);
13922                                 break;
13923                         case 'Q':
13924                                 qgroup_report = 1;
13925                                 break;
13926                         case 'E':
13927                                 subvolid = arg_strtou64(optarg);
13928                                 break;
13929                         case 'r':
13930                                 tree_root_bytenr = arg_strtou64(optarg);
13931                                 break;
13932                         case GETOPT_VAL_CHUNK_TREE:
13933                                 chunk_root_bytenr = arg_strtou64(optarg);
13934                                 break;
13935                         case 'p':
13936                                 ctx.progress_enabled = true;
13937                                 break;
13938                         case '?':
13939                         case 'h':
13940                                 usage(cmd_check_usage);
13941                         case GETOPT_VAL_REPAIR:
13942                                 printf("enabling repair mode\n");
13943                                 repair = 1;
13944                                 ctree_flags |= OPEN_CTREE_WRITES;
13945                                 break;
13946                         case GETOPT_VAL_READONLY:
13947                                 readonly = 1;
13948                                 break;
13949                         case GETOPT_VAL_INIT_CSUM:
13950                                 printf("Creating a new CRC tree\n");
13951                                 init_csum_tree = 1;
13952                                 repair = 1;
13953                                 ctree_flags |= OPEN_CTREE_WRITES;
13954                                 break;
13955                         case GETOPT_VAL_INIT_EXTENT:
13956                                 init_extent_tree = 1;
13957                                 ctree_flags |= (OPEN_CTREE_WRITES |
13958                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13959                                 repair = 1;
13960                                 break;
13961                         case GETOPT_VAL_CHECK_CSUM:
13962                                 check_data_csum = 1;
13963                                 break;
13964                         case GETOPT_VAL_MODE:
13965                                 check_mode = parse_check_mode(optarg);
13966                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13967                                         error("unknown mode: %s", optarg);
13968                                         exit(1);
13969                                 }
13970                                 break;
13971                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13972                                 if (strcmp(optarg, "v1") == 0) {
13973                                         clear_space_cache = 1;
13974                                 } else if (strcmp(optarg, "v2") == 0) {
13975                                         clear_space_cache = 2;
13976                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13977                                 } else {
13978                                         error(
13979                 "invalid argument to --clear-space-cache, must be v1 or v2");
13980                                         exit(1);
13981                                 }
13982                                 ctree_flags |= OPEN_CTREE_WRITES;
13983                                 break;
13984                         case GETOPT_VAL_FORCE:
13985                                 force = 1;
13986                                 break;
13987                 }
13988         }
13989
13990         if (check_argc_exact(argc - optind, 1))
13991                 usage(cmd_check_usage);
13992
13993         if (ctx.progress_enabled) {
13994                 ctx.tp = TASK_NOTHING;
13995                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13996         }
13997
13998         /* This check is the only reason for --readonly to exist */
13999         if (readonly && repair) {
14000                 error("repair options are not compatible with --readonly");
14001                 exit(1);
14002         }
14003
14004         /*
14005          * experimental and dangerous
14006          */
14007         if (repair && check_mode == CHECK_MODE_LOWMEM)
14008                 warning("low-memory mode repair support is only partial");
14009
14010         radix_tree_init();
14011         cache_tree_init(&root_cache);
14012
14013         ret = check_mounted(argv[optind]);
14014         if (!force) {
14015                 if (ret < 0) {
14016                         error("could not check mount status: %s",
14017                                         strerror(-ret));
14018                         err |= !!ret;
14019                         goto err_out;
14020                 } else if (ret) {
14021                         error(
14022 "%s is currently mounted, use --force if you really intend to check the filesystem",
14023                                 argv[optind]);
14024                         ret = -EBUSY;
14025                         err |= !!ret;
14026                         goto err_out;
14027                 }
14028         } else {
14029                 if (repair) {
14030                         error("repair and --force is not yet supported");
14031                         ret = 1;
14032                         err |= !!ret;
14033                         goto err_out;
14034                 }
14035                 if (ret < 0) {
14036                         warning(
14037 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14038                                 argv[optind]);
14039                 } else if (ret) {
14040                         warning(
14041                         "filesystem mounted, continuing because of --force");
14042                 }
14043                 /* A block device is mounted in exclusive mode by kernel */
14044                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14045         }
14046
14047         /* only allow partial opening under repair mode */
14048         if (repair)
14049                 ctree_flags |= OPEN_CTREE_PARTIAL;
14050
14051         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14052                                   chunk_root_bytenr, ctree_flags);
14053         if (!info) {
14054                 error("cannot open file system");
14055                 ret = -EIO;
14056                 err |= !!ret;
14057                 goto err_out;
14058         }
14059
14060         global_info = info;
14061         root = info->fs_root;
14062         uuid_unparse(info->super_copy->fsid, uuidbuf);
14063
14064         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14065
14066         /*
14067          * Check the bare minimum before starting anything else that could rely
14068          * on it, namely the tree roots, any local consistency checks
14069          */
14070         if (!extent_buffer_uptodate(info->tree_root->node) ||
14071             !extent_buffer_uptodate(info->dev_root->node) ||
14072             !extent_buffer_uptodate(info->chunk_root->node)) {
14073                 error("critical roots corrupted, unable to check the filesystem");
14074                 err |= !!ret;
14075                 ret = -EIO;
14076                 goto close_out;
14077         }
14078
14079         if (clear_space_cache) {
14080                 ret = do_clear_free_space_cache(info, clear_space_cache);
14081                 err |= !!ret;
14082                 goto close_out;
14083         }
14084
14085         /*
14086          * repair mode will force us to commit transaction which
14087          * will make us fail to load log tree when mounting.
14088          */
14089         if (repair && btrfs_super_log_root(info->super_copy)) {
14090                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14091                 if (!ret) {
14092                         ret = 1;
14093                         err |= !!ret;
14094                         goto close_out;
14095                 }
14096                 ret = zero_log_tree(root);
14097                 err |= !!ret;
14098                 if (ret) {
14099                         error("failed to zero log tree: %d", ret);
14100                         goto close_out;
14101                 }
14102         }
14103
14104         if (qgroup_report) {
14105                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14106                        uuidbuf);
14107                 ret = qgroup_verify_all(info);
14108                 err |= !!ret;
14109                 if (ret == 0)
14110                         report_qgroups(1);
14111                 goto close_out;
14112         }
14113         if (subvolid) {
14114                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14115                        subvolid, argv[optind], uuidbuf);
14116                 ret = print_extent_state(info, subvolid);
14117                 err |= !!ret;
14118                 goto close_out;
14119         }
14120
14121         if (init_extent_tree || init_csum_tree) {
14122                 struct btrfs_trans_handle *trans;
14123
14124                 trans = btrfs_start_transaction(info->extent_root, 0);
14125                 if (IS_ERR(trans)) {
14126                         error("error starting transaction");
14127                         ret = PTR_ERR(trans);
14128                         err |= !!ret;
14129                         goto close_out;
14130                 }
14131
14132                 if (init_extent_tree) {
14133                         printf("Creating a new extent tree\n");
14134                         ret = reinit_extent_tree(trans, info);
14135                         err |= !!ret;
14136                         if (ret)
14137                                 goto close_out;
14138                 }
14139
14140                 if (init_csum_tree) {
14141                         printf("Reinitialize checksum tree\n");
14142                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14143                         if (ret) {
14144                                 error("checksum tree initialization failed: %d",
14145                                                 ret);
14146                                 ret = -EIO;
14147                                 err |= !!ret;
14148                                 goto close_out;
14149                         }
14150
14151                         ret = fill_csum_tree(trans, info->csum_root,
14152                                              init_extent_tree);
14153                         err |= !!ret;
14154                         if (ret) {
14155                                 error("checksum tree refilling failed: %d", ret);
14156                                 return -EIO;
14157                         }
14158                 }
14159                 /*
14160                  * Ok now we commit and run the normal fsck, which will add
14161                  * extent entries for all of the items it finds.
14162                  */
14163                 ret = btrfs_commit_transaction(trans, info->extent_root);
14164                 err |= !!ret;
14165                 if (ret)
14166                         goto close_out;
14167         }
14168         if (!extent_buffer_uptodate(info->extent_root->node)) {
14169                 error("critical: extent_root, unable to check the filesystem");
14170                 ret = -EIO;
14171                 err |= !!ret;
14172                 goto close_out;
14173         }
14174         if (!extent_buffer_uptodate(info->csum_root->node)) {
14175                 error("critical: csum_root, unable to check the filesystem");
14176                 ret = -EIO;
14177                 err |= !!ret;
14178                 goto close_out;
14179         }
14180
14181         ret = do_check_chunks_and_extents(info);
14182         err |= !!ret;
14183         if (ret)
14184                 error(
14185                 "errors found in extent allocation tree or chunk allocation");
14186
14187         ret = repair_root_items(info);
14188         err |= !!ret;
14189         if (ret < 0) {
14190                 error("failed to repair root items: %s", strerror(-ret));
14191                 goto close_out;
14192         }
14193         if (repair) {
14194                 fprintf(stderr, "Fixed %d roots.\n", ret);
14195                 ret = 0;
14196         } else if (ret > 0) {
14197                 fprintf(stderr,
14198                        "Found %d roots with an outdated root item.\n",
14199                        ret);
14200                 fprintf(stderr,
14201                         "Please run a filesystem check with the option --repair to fix them.\n");
14202                 ret = 1;
14203                 err |= !!ret;
14204                 goto close_out;
14205         }
14206
14207         if (!ctx.progress_enabled) {
14208                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14209                         fprintf(stderr, "checking free space tree\n");
14210                 else
14211                         fprintf(stderr, "checking free space cache\n");
14212         }
14213         ret = check_space_cache(root);
14214         err |= !!ret;
14215         if (ret) {
14216                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14217                         error("errors found in free space tree");
14218                 else
14219                         error("errors found in free space cache");
14220                 goto out;
14221         }
14222
14223         /*
14224          * We used to have to have these hole extents in between our real
14225          * extents so if we don't have this flag set we need to make sure there
14226          * are no gaps in the file extents for inodes, otherwise we can just
14227          * ignore it when this happens.
14228          */
14229         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14230         ret = do_check_fs_roots(info, &root_cache);
14231         err |= !!ret;
14232         if (ret) {
14233                 error("errors found in fs roots");
14234                 goto out;
14235         }
14236
14237         fprintf(stderr, "checking csums\n");
14238         ret = check_csums(root);
14239         err |= !!ret;
14240         if (ret) {
14241                 error("errors found in csum tree");
14242                 goto out;
14243         }
14244
14245         fprintf(stderr, "checking root refs\n");
14246         /* For low memory mode, check_fs_roots_v2 handles root refs */
14247         if (check_mode != CHECK_MODE_LOWMEM) {
14248                 ret = check_root_refs(root, &root_cache);
14249                 err |= !!ret;
14250                 if (ret) {
14251                         error("errors found in root refs");
14252                         goto out;
14253                 }
14254         }
14255
14256         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14257                 struct extent_buffer *eb;
14258
14259                 eb = list_first_entry(&root->fs_info->recow_ebs,
14260                                       struct extent_buffer, recow);
14261                 list_del_init(&eb->recow);
14262                 ret = recow_extent_buffer(root, eb);
14263                 err |= !!ret;
14264                 if (ret) {
14265                         error("fails to fix transid errors");
14266                         break;
14267                 }
14268         }
14269
14270         while (!list_empty(&delete_items)) {
14271                 struct bad_item *bad;
14272
14273                 bad = list_first_entry(&delete_items, struct bad_item, list);
14274                 list_del_init(&bad->list);
14275                 if (repair) {
14276                         ret = delete_bad_item(root, bad);
14277                         err |= !!ret;
14278                 }
14279                 free(bad);
14280         }
14281
14282         if (info->quota_enabled) {
14283                 fprintf(stderr, "checking quota groups\n");
14284                 ret = qgroup_verify_all(info);
14285                 err |= !!ret;
14286                 if (ret) {
14287                         error("failed to check quota groups");
14288                         goto out;
14289                 }
14290                 report_qgroups(0);
14291                 ret = repair_qgroups(info, &qgroups_repaired);
14292                 err |= !!ret;
14293                 if (err) {
14294                         error("failed to repair quota groups");
14295                         goto out;
14296                 }
14297                 ret = 0;
14298         }
14299
14300         if (!list_empty(&root->fs_info->recow_ebs)) {
14301                 error("transid errors in file system");
14302                 ret = 1;
14303                 err |= !!ret;
14304         }
14305 out:
14306         printf("found %llu bytes used, ",
14307                (unsigned long long)bytes_used);
14308         if (err)
14309                 printf("error(s) found\n");
14310         else
14311                 printf("no error found\n");
14312         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14313         printf("total tree bytes: %llu\n",
14314                (unsigned long long)total_btree_bytes);
14315         printf("total fs tree bytes: %llu\n",
14316                (unsigned long long)total_fs_tree_bytes);
14317         printf("total extent tree bytes: %llu\n",
14318                (unsigned long long)total_extent_tree_bytes);
14319         printf("btree space waste bytes: %llu\n",
14320                (unsigned long long)btree_space_waste);
14321         printf("file data blocks allocated: %llu\n referenced %llu\n",
14322                 (unsigned long long)data_bytes_allocated,
14323                 (unsigned long long)data_bytes_referenced);
14324
14325         free_qgroup_counts();
14326         free_root_recs_tree(&root_cache);
14327 close_out:
14328         close_ctree(root);
14329 err_out:
14330         if (ctx.progress_enabled)
14331                 task_deinit(ctx.info);
14332
14333         return err;
14334 }