btrfs-progs: check: repair inode orphan item in lowmem mode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
140 {
141         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143         struct data_backref *back1 = to_data_backref(ext1);
144         struct data_backref *back2 = to_data_backref(ext2);
145
146         WARN_ON(!ext1->is_data);
147         WARN_ON(!ext2->is_data);
148
149         /* parent and root are a union, so this covers both */
150         if (back1->parent > back2->parent)
151                 return 1;
152         if (back1->parent < back2->parent)
153                 return -1;
154
155         /* This is a full backref and the parents match. */
156         if (back1->node.full_backref)
157                 return 0;
158
159         if (back1->owner > back2->owner)
160                 return 1;
161         if (back1->owner < back2->owner)
162                 return -1;
163
164         if (back1->offset > back2->offset)
165                 return 1;
166         if (back1->offset < back2->offset)
167                 return -1;
168
169         if (back1->found_ref && back2->found_ref) {
170                 if (back1->disk_bytenr > back2->disk_bytenr)
171                         return 1;
172                 if (back1->disk_bytenr < back2->disk_bytenr)
173                         return -1;
174
175                 if (back1->bytes > back2->bytes)
176                         return 1;
177                 if (back1->bytes < back2->bytes)
178                         return -1;
179         }
180
181         return 0;
182 }
183
184 /*
185  * Much like data_backref, just removed the undetermined members
186  * and change it to use list_head.
187  * During extent scan, it is stored in root->orphan_data_extent.
188  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
189  */
190 struct orphan_data_extent {
191         struct list_head list;
192         u64 root;
193         u64 objectid;
194         u64 offset;
195         u64 disk_bytenr;
196         u64 disk_len;
197 };
198
199 struct tree_backref {
200         struct extent_backref node;
201         union {
202                 u64 parent;
203                 u64 root;
204         };
205 };
206
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
208 {
209         return container_of(back, struct tree_backref, node);
210 }
211
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
213 {
214         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216         struct tree_backref *back1 = to_tree_backref(ext1);
217         struct tree_backref *back2 = to_tree_backref(ext2);
218
219         WARN_ON(ext1->is_data);
220         WARN_ON(ext2->is_data);
221
222         /* parent and root are a union, so this covers both */
223         if (back1->parent > back2->parent)
224                 return 1;
225         if (back1->parent < back2->parent)
226                 return -1;
227
228         return 0;
229 }
230
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
232 {
233         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
235
236         if (ext1->is_data > ext2->is_data)
237                 return 1;
238
239         if (ext1->is_data < ext2->is_data)
240                 return -1;
241
242         if (ext1->full_backref > ext2->full_backref)
243                 return 1;
244         if (ext1->full_backref < ext2->full_backref)
245                 return -1;
246
247         if (ext1->is_data)
248                 return compare_data_backref(node1, node2);
249         else
250                 return compare_tree_backref(node1, node2);
251 }
252
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
255
256 struct extent_record {
257         struct list_head backrefs;
258         struct list_head dups;
259         struct rb_root backref_tree;
260         struct list_head list;
261         struct cache_extent cache;
262         struct btrfs_disk_key parent_key;
263         u64 start;
264         u64 max_size;
265         u64 nr;
266         u64 refs;
267         u64 extent_item_refs;
268         u64 generation;
269         u64 parent_generation;
270         u64 info_objectid;
271         u32 num_duplicates;
272         u8 info_level;
273         unsigned int flag_block_full_backref:2;
274         unsigned int found_rec:1;
275         unsigned int content_checked:1;
276         unsigned int owner_ref_checked:1;
277         unsigned int is_root:1;
278         unsigned int metadata:1;
279         unsigned int bad_full_backref:1;
280         unsigned int crossing_stripes:1;
281         unsigned int wrong_chunk_type:1;
282 };
283
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
285 {
286         return container_of(entry, struct extent_record, list);
287 }
288
289 struct inode_backref {
290         struct list_head list;
291         unsigned int found_dir_item:1;
292         unsigned int found_dir_index:1;
293         unsigned int found_inode_ref:1;
294         u8 filetype;
295         u8 ref_type;
296         int errors;
297         u64 dir;
298         u64 index;
299         u16 namelen;
300         char name[0];
301 };
302
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
304 {
305         return list_entry(entry, struct inode_backref, list);
306 }
307
308 struct root_item_record {
309         struct list_head list;
310         u64 objectid;
311         u64 bytenr;
312         u64 last_snapshot;
313         u8 level;
314         u8 drop_level;
315         struct btrfs_key drop_key;
316 };
317
318 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
320 #define REF_ERR_NO_INODE_REF            (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
323 #define REF_ERR_DUP_INODE_REF           (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF             (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
331
332 struct file_extent_hole {
333         struct rb_node node;
334         u64 start;
335         u64 len;
336 };
337
338 struct inode_record {
339         struct list_head backrefs;
340         unsigned int checked:1;
341         unsigned int merging:1;
342         unsigned int found_inode_item:1;
343         unsigned int found_dir_item:1;
344         unsigned int found_file_extent:1;
345         unsigned int found_csum_item:1;
346         unsigned int some_csum_missing:1;
347         unsigned int nodatasum:1;
348         int errors;
349
350         u64 ino;
351         u32 nlink;
352         u32 imode;
353         u64 isize;
354         u64 nbytes;
355
356         u32 found_link;
357         u64 found_size;
358         u64 extent_start;
359         u64 extent_end;
360         struct rb_root holes;
361         struct list_head orphan_extents;
362
363         u32 refs;
364 };
365
366 #define I_ERR_NO_INODE_ITEM             (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
381
382 struct root_backref {
383         struct list_head list;
384         unsigned int found_dir_item:1;
385         unsigned int found_dir_index:1;
386         unsigned int found_back_ref:1;
387         unsigned int found_forward_ref:1;
388         unsigned int reachable:1;
389         int errors;
390         u64 ref_root;
391         u64 dir;
392         u64 index;
393         u16 namelen;
394         char name[0];
395 };
396
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
398 {
399         return list_entry(entry, struct root_backref, list);
400 }
401
402 struct root_record {
403         struct list_head backrefs;
404         struct cache_extent cache;
405         unsigned int found_root_item:1;
406         u64 objectid;
407         u32 found_ref;
408 };
409
410 struct ptr_node {
411         struct cache_extent cache;
412         void *data;
413 };
414
415 struct shared_node {
416         struct cache_extent cache;
417         struct cache_tree root_cache;
418         struct cache_tree inode_cache;
419         struct inode_record *current;
420         u32 refs;
421 };
422
423 struct block_info {
424         u64 start;
425         u32 size;
426 };
427
428 struct walk_control {
429         struct cache_tree shared;
430         struct shared_node *nodes[BTRFS_MAX_LEVEL];
431         int active_node;
432         int root_level;
433 };
434
435 struct bad_item {
436         struct btrfs_key key;
437         u64 root_id;
438         struct list_head list;
439 };
440
441 struct extent_entry {
442         u64 bytenr;
443         u64 bytes;
444         int count;
445         int broken;
446         struct list_head list;
447 };
448
449 struct root_item_info {
450         /* level of the root */
451         u8 level;
452         /* number of nodes at this level, must be 1 for a root */
453         int node_count;
454         u64 bytenr;
455         u64 gen;
456         struct cache_extent cache_extent;
457 };
458
459 /*
460  * Error bit for low memory mode check.
461  *
462  * Currently no caller cares about it yet.  Just internal use for error
463  * classification.
464  */
465 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH     (1 << 8)
475
476 static void *print_status_check(void *p)
477 {
478         struct task_ctx *priv = p;
479         const char work_indicator[] = { '.', 'o', 'O', 'o' };
480         uint32_t count = 0;
481         static char *task_position_string[] = {
482                 "checking extents",
483                 "checking free space cache",
484                 "checking fs roots",
485         };
486
487         task_period_start(priv->info, 1000 /* 1s */);
488
489         if (priv->tp == TASK_NOTHING)
490                 return NULL;
491
492         while (1) {
493                 printf("%s [%c]\r", task_position_string[priv->tp],
494                                 work_indicator[count % 4]);
495                 count++;
496                 fflush(stdout);
497                 task_period_wait(priv->info);
498         }
499         return NULL;
500 }
501
502 static int print_status_return(void *p)
503 {
504         printf("\n");
505         fflush(stdout);
506
507         return 0;
508 }
509
510 static enum btrfs_check_mode parse_check_mode(const char *str)
511 {
512         if (strcmp(str, "lowmem") == 0)
513                 return CHECK_MODE_LOWMEM;
514         if (strcmp(str, "orig") == 0)
515                 return CHECK_MODE_ORIGINAL;
516         if (strcmp(str, "original") == 0)
517                 return CHECK_MODE_ORIGINAL;
518
519         return CHECK_MODE_UNKNOWN;
520 }
521
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
524 {
525         struct file_extent_hole *hole;
526
527         if (RB_EMPTY_ROOT(holes))
528                 return (u64)-1;
529
530         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
531         return hole->start;
532 }
533
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
535 {
536         struct file_extent_hole *hole1;
537         struct file_extent_hole *hole2;
538
539         hole1 = rb_entry(node1, struct file_extent_hole, node);
540         hole2 = rb_entry(node2, struct file_extent_hole, node);
541
542         if (hole1->start > hole2->start)
543                 return -1;
544         if (hole1->start < hole2->start)
545                 return 1;
546         /* Now hole1->start == hole2->start */
547         if (hole1->len >= hole2->len)
548                 /*
549                  * Hole 1 will be merge center
550                  * Same hole will be merged later
551                  */
552                 return -1;
553         /* Hole 2 will be merge center */
554         return 1;
555 }
556
557 /*
558  * Add a hole to the record
559  *
560  * This will do hole merge for copy_file_extent_holes(),
561  * which will ensure there won't be continuous holes.
562  */
563 static int add_file_extent_hole(struct rb_root *holes,
564                                 u64 start, u64 len)
565 {
566         struct file_extent_hole *hole;
567         struct file_extent_hole *prev = NULL;
568         struct file_extent_hole *next = NULL;
569
570         hole = malloc(sizeof(*hole));
571         if (!hole)
572                 return -ENOMEM;
573         hole->start = start;
574         hole->len = len;
575         /* Since compare will not return 0, no -EEXIST will happen */
576         rb_insert(holes, &hole->node, compare_hole);
577
578         /* simple merge with previous hole */
579         if (rb_prev(&hole->node))
580                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
581                                 node);
582         if (prev && prev->start + prev->len >= hole->start) {
583                 hole->len = hole->start + hole->len - prev->start;
584                 hole->start = prev->start;
585                 rb_erase(&prev->node, holes);
586                 free(prev);
587                 prev = NULL;
588         }
589
590         /* iterate merge with next holes */
591         while (1) {
592                 if (!rb_next(&hole->node))
593                         break;
594                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
595                                         node);
596                 if (hole->start + hole->len >= next->start) {
597                         if (hole->start + hole->len <= next->start + next->len)
598                                 hole->len = next->start + next->len -
599                                             hole->start;
600                         rb_erase(&next->node, holes);
601                         free(next);
602                         next = NULL;
603                 } else
604                         break;
605         }
606         return 0;
607 }
608
609 static int compare_hole_range(struct rb_node *node, void *data)
610 {
611         struct file_extent_hole *hole;
612         u64 start;
613
614         hole = (struct file_extent_hole *)data;
615         start = hole->start;
616
617         hole = rb_entry(node, struct file_extent_hole, node);
618         if (start < hole->start)
619                 return -1;
620         if (start >= hole->start && start < hole->start + hole->len)
621                 return 0;
622         return 1;
623 }
624
625 /*
626  * Delete a hole in the record
627  *
628  * This will do the hole split and is much restrict than add.
629  */
630 static int del_file_extent_hole(struct rb_root *holes,
631                                 u64 start, u64 len)
632 {
633         struct file_extent_hole *hole;
634         struct file_extent_hole tmp;
635         u64 prev_start = 0;
636         u64 prev_len = 0;
637         u64 next_start = 0;
638         u64 next_len = 0;
639         struct rb_node *node;
640         int have_prev = 0;
641         int have_next = 0;
642         int ret = 0;
643
644         tmp.start = start;
645         tmp.len = len;
646         node = rb_search(holes, &tmp, compare_hole_range, NULL);
647         if (!node)
648                 return -EEXIST;
649         hole = rb_entry(node, struct file_extent_hole, node);
650         if (start + len > hole->start + hole->len)
651                 return -EEXIST;
652
653         /*
654          * Now there will be no overlap, delete the hole and re-add the
655          * split(s) if they exists.
656          */
657         if (start > hole->start) {
658                 prev_start = hole->start;
659                 prev_len = start - hole->start;
660                 have_prev = 1;
661         }
662         if (hole->start + hole->len > start + len) {
663                 next_start = start + len;
664                 next_len = hole->start + hole->len - start - len;
665                 have_next = 1;
666         }
667         rb_erase(node, holes);
668         free(hole);
669         if (have_prev) {
670                 ret = add_file_extent_hole(holes, prev_start, prev_len);
671                 if (ret < 0)
672                         return ret;
673         }
674         if (have_next) {
675                 ret = add_file_extent_hole(holes, next_start, next_len);
676                 if (ret < 0)
677                         return ret;
678         }
679         return 0;
680 }
681
682 static int copy_file_extent_holes(struct rb_root *dst,
683                                   struct rb_root *src)
684 {
685         struct file_extent_hole *hole;
686         struct rb_node *node;
687         int ret = 0;
688
689         node = rb_first(src);
690         while (node) {
691                 hole = rb_entry(node, struct file_extent_hole, node);
692                 ret = add_file_extent_hole(dst, hole->start, hole->len);
693                 if (ret)
694                         break;
695                 node = rb_next(node);
696         }
697         return ret;
698 }
699
700 static void free_file_extent_holes(struct rb_root *holes)
701 {
702         struct rb_node *node;
703         struct file_extent_hole *hole;
704
705         node = rb_first(holes);
706         while (node) {
707                 hole = rb_entry(node, struct file_extent_hole, node);
708                 rb_erase(node, holes);
709                 free(hole);
710                 node = rb_first(holes);
711         }
712 }
713
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
715
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717                                  struct btrfs_root *root)
718 {
719         if (root->last_trans != trans->transid) {
720                 root->track_dirty = 1;
721                 root->last_trans = trans->transid;
722                 root->commit_root = root->node;
723                 extent_buffer_get(root->node);
724         }
725 }
726
727 static u8 imode_to_type(u32 imode)
728 {
729 #define S_SHIFT 12
730         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
732                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
733                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
734                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
735                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
736                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
737                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
738         };
739
740         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
741 #undef S_SHIFT
742 }
743
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
745 {
746         struct device_record *rec1;
747         struct device_record *rec2;
748
749         rec1 = rb_entry(node1, struct device_record, node);
750         rec2 = rb_entry(node2, struct device_record, node);
751         if (rec1->devid > rec2->devid)
752                 return -1;
753         else if (rec1->devid < rec2->devid)
754                 return 1;
755         else
756                 return 0;
757 }
758
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
760 {
761         struct inode_record *rec;
762         struct inode_backref *backref;
763         struct inode_backref *orig;
764         struct inode_backref *tmp;
765         struct orphan_data_extent *src_orphan;
766         struct orphan_data_extent *dst_orphan;
767         struct rb_node *rb;
768         size_t size;
769         int ret;
770
771         rec = malloc(sizeof(*rec));
772         if (!rec)
773                 return ERR_PTR(-ENOMEM);
774         memcpy(rec, orig_rec, sizeof(*rec));
775         rec->refs = 1;
776         INIT_LIST_HEAD(&rec->backrefs);
777         INIT_LIST_HEAD(&rec->orphan_extents);
778         rec->holes = RB_ROOT;
779
780         list_for_each_entry(orig, &orig_rec->backrefs, list) {
781                 size = sizeof(*orig) + orig->namelen + 1;
782                 backref = malloc(size);
783                 if (!backref) {
784                         ret = -ENOMEM;
785                         goto cleanup;
786                 }
787                 memcpy(backref, orig, size);
788                 list_add_tail(&backref->list, &rec->backrefs);
789         }
790         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791                 dst_orphan = malloc(sizeof(*dst_orphan));
792                 if (!dst_orphan) {
793                         ret = -ENOMEM;
794                         goto cleanup;
795                 }
796                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
798         }
799         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
800         if (ret < 0)
801                 goto cleanup_rb;
802
803         return rec;
804
805 cleanup_rb:
806         rb = rb_first(&rec->holes);
807         while (rb) {
808                 struct file_extent_hole *hole;
809
810                 hole = rb_entry(rb, struct file_extent_hole, node);
811                 rb = rb_next(rb);
812                 free(hole);
813         }
814
815 cleanup:
816         if (!list_empty(&rec->backrefs))
817                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818                         list_del(&orig->list);
819                         free(orig);
820                 }
821
822         if (!list_empty(&rec->orphan_extents))
823                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824                         list_del(&orig->list);
825                         free(orig);
826                 }
827
828         free(rec);
829
830         return ERR_PTR(ret);
831 }
832
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
834                                       u64 objectid)
835 {
836         struct orphan_data_extent *orphan;
837
838         if (list_empty(orphan_extents))
839                 return;
840         printf("The following data extent is lost in tree %llu:\n",
841                objectid);
842         list_for_each_entry(orphan, orphan_extents, list) {
843                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
845                        orphan->disk_len);
846         }
847 }
848
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
850 {
851         u64 root_objectid = root->root_key.objectid;
852         int errors = rec->errors;
853
854         if (!errors)
855                 return;
856         /* reloc root errors, we print its corresponding fs root objectid*/
857         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858                 root_objectid = root->root_key.offset;
859                 fprintf(stderr, "reloc");
860         }
861         fprintf(stderr, "root %llu inode %llu errors %x",
862                 (unsigned long long) root_objectid,
863                 (unsigned long long) rec->ino, rec->errors);
864
865         if (errors & I_ERR_NO_INODE_ITEM)
866                 fprintf(stderr, ", no inode item");
867         if (errors & I_ERR_NO_ORPHAN_ITEM)
868                 fprintf(stderr, ", no orphan item");
869         if (errors & I_ERR_DUP_INODE_ITEM)
870                 fprintf(stderr, ", dup inode item");
871         if (errors & I_ERR_DUP_DIR_INDEX)
872                 fprintf(stderr, ", dup dir index");
873         if (errors & I_ERR_ODD_DIR_ITEM)
874                 fprintf(stderr, ", odd dir item");
875         if (errors & I_ERR_ODD_FILE_EXTENT)
876                 fprintf(stderr, ", odd file extent");
877         if (errors & I_ERR_BAD_FILE_EXTENT)
878                 fprintf(stderr, ", bad file extent");
879         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880                 fprintf(stderr, ", file extent overlap");
881         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882                 fprintf(stderr, ", file extent discount");
883         if (errors & I_ERR_DIR_ISIZE_WRONG)
884                 fprintf(stderr, ", dir isize wrong");
885         if (errors & I_ERR_FILE_NBYTES_WRONG)
886                 fprintf(stderr, ", nbytes wrong");
887         if (errors & I_ERR_ODD_CSUM_ITEM)
888                 fprintf(stderr, ", odd csum item");
889         if (errors & I_ERR_SOME_CSUM_MISSING)
890                 fprintf(stderr, ", some csum missing");
891         if (errors & I_ERR_LINK_COUNT_WRONG)
892                 fprintf(stderr, ", link count wrong");
893         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894                 fprintf(stderr, ", orphan file extent");
895         fprintf(stderr, "\n");
896         /* Print the orphan extents if needed */
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
899
900         /* Print the holes if needed */
901         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902                 struct file_extent_hole *hole;
903                 struct rb_node *node;
904                 int found = 0;
905
906                 node = rb_first(&rec->holes);
907                 fprintf(stderr, "Found file extent holes:\n");
908                 while (node) {
909                         found = 1;
910                         hole = rb_entry(node, struct file_extent_hole, node);
911                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
912                                 hole->start, hole->len);
913                         node = rb_next(node);
914                 }
915                 if (!found)
916                         fprintf(stderr, "\tstart: 0, len: %llu\n",
917                                 round_up(rec->isize,
918                                          root->fs_info->sectorsize));
919         }
920 }
921
922 static void print_ref_error(int errors)
923 {
924         if (errors & REF_ERR_NO_DIR_ITEM)
925                 fprintf(stderr, ", no dir item");
926         if (errors & REF_ERR_NO_DIR_INDEX)
927                 fprintf(stderr, ", no dir index");
928         if (errors & REF_ERR_NO_INODE_REF)
929                 fprintf(stderr, ", no inode ref");
930         if (errors & REF_ERR_DUP_DIR_ITEM)
931                 fprintf(stderr, ", dup dir item");
932         if (errors & REF_ERR_DUP_DIR_INDEX)
933                 fprintf(stderr, ", dup dir index");
934         if (errors & REF_ERR_DUP_INODE_REF)
935                 fprintf(stderr, ", dup inode ref");
936         if (errors & REF_ERR_INDEX_UNMATCH)
937                 fprintf(stderr, ", index mismatch");
938         if (errors & REF_ERR_FILETYPE_UNMATCH)
939                 fprintf(stderr, ", filetype mismatch");
940         if (errors & REF_ERR_NAME_TOO_LONG)
941                 fprintf(stderr, ", name too long");
942         if (errors & REF_ERR_NO_ROOT_REF)
943                 fprintf(stderr, ", no root ref");
944         if (errors & REF_ERR_NO_ROOT_BACKREF)
945                 fprintf(stderr, ", no root backref");
946         if (errors & REF_ERR_DUP_ROOT_REF)
947                 fprintf(stderr, ", dup root ref");
948         if (errors & REF_ERR_DUP_ROOT_BACKREF)
949                 fprintf(stderr, ", dup root backref");
950         fprintf(stderr, "\n");
951 }
952
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
954                                           u64 ino, int mod)
955 {
956         struct ptr_node *node;
957         struct cache_extent *cache;
958         struct inode_record *rec = NULL;
959         int ret;
960
961         cache = lookup_cache_extent(inode_cache, ino, 1);
962         if (cache) {
963                 node = container_of(cache, struct ptr_node, cache);
964                 rec = node->data;
965                 if (mod && rec->refs > 1) {
966                         node->data = clone_inode_rec(rec);
967                         if (IS_ERR(node->data))
968                                 return node->data;
969                         rec->refs--;
970                         rec = node->data;
971                 }
972         } else if (mod) {
973                 rec = calloc(1, sizeof(*rec));
974                 if (!rec)
975                         return ERR_PTR(-ENOMEM);
976                 rec->ino = ino;
977                 rec->extent_start = (u64)-1;
978                 rec->refs = 1;
979                 INIT_LIST_HEAD(&rec->backrefs);
980                 INIT_LIST_HEAD(&rec->orphan_extents);
981                 rec->holes = RB_ROOT;
982
983                 node = malloc(sizeof(*node));
984                 if (!node) {
985                         free(rec);
986                         return ERR_PTR(-ENOMEM);
987                 }
988                 node->cache.start = ino;
989                 node->cache.size = 1;
990                 node->data = rec;
991
992                 if (ino == BTRFS_FREE_INO_OBJECTID)
993                         rec->found_link = 1;
994
995                 ret = insert_cache_extent(inode_cache, &node->cache);
996                 if (ret)
997                         return ERR_PTR(-EEXIST);
998         }
999         return rec;
1000 }
1001
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1003 {
1004         struct orphan_data_extent *orphan;
1005
1006         while (!list_empty(orphan_extents)) {
1007                 orphan = list_entry(orphan_extents->next,
1008                                     struct orphan_data_extent, list);
1009                 list_del(&orphan->list);
1010                 free(orphan);
1011         }
1012 }
1013
1014 static void free_inode_rec(struct inode_record *rec)
1015 {
1016         struct inode_backref *backref;
1017
1018         if (--rec->refs > 0)
1019                 return;
1020
1021         while (!list_empty(&rec->backrefs)) {
1022                 backref = to_inode_backref(rec->backrefs.next);
1023                 list_del(&backref->list);
1024                 free(backref);
1025         }
1026         free_orphan_data_extents(&rec->orphan_extents);
1027         free_file_extent_holes(&rec->holes);
1028         free(rec);
1029 }
1030
1031 static int can_free_inode_rec(struct inode_record *rec)
1032 {
1033         if (!rec->errors && rec->checked && rec->found_inode_item &&
1034             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1035                 return 1;
1036         return 0;
1037 }
1038
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040                                  struct inode_record *rec)
1041 {
1042         struct cache_extent *cache;
1043         struct inode_backref *tmp, *backref;
1044         struct ptr_node *node;
1045         u8 filetype;
1046
1047         if (!rec->found_inode_item)
1048                 return;
1049
1050         filetype = imode_to_type(rec->imode);
1051         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052                 if (backref->found_dir_item && backref->found_dir_index) {
1053                         if (backref->filetype != filetype)
1054                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055                         if (!backref->errors && backref->found_inode_ref &&
1056                             rec->nlink == rec->found_link) {
1057                                 list_del(&backref->list);
1058                                 free(backref);
1059                         }
1060                 }
1061         }
1062
1063         if (!rec->checked || rec->merging)
1064                 return;
1065
1066         if (S_ISDIR(rec->imode)) {
1067                 if (rec->found_size != rec->isize)
1068                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069                 if (rec->found_file_extent)
1070                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072                 if (rec->found_dir_item)
1073                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1074                 if (rec->found_size != rec->nbytes)
1075                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076                 if (rec->nlink > 0 && !no_holes &&
1077                     (rec->extent_end < rec->isize ||
1078                      first_extent_gap(&rec->holes) < rec->isize))
1079                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1080         }
1081
1082         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083                 if (rec->found_csum_item && rec->nodatasum)
1084                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085                 if (rec->some_csum_missing && !rec->nodatasum)
1086                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1087         }
1088
1089         BUG_ON(rec->refs != 1);
1090         if (can_free_inode_rec(rec)) {
1091                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092                 node = container_of(cache, struct ptr_node, cache);
1093                 BUG_ON(node->data != rec);
1094                 remove_cache_extent(inode_cache, &node->cache);
1095                 free(node);
1096                 free_inode_rec(rec);
1097         }
1098 }
1099
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1101 {
1102         struct btrfs_path path;
1103         struct btrfs_key key;
1104         int ret;
1105
1106         key.objectid = BTRFS_ORPHAN_OBJECTID;
1107         key.type = BTRFS_ORPHAN_ITEM_KEY;
1108         key.offset = ino;
1109
1110         btrfs_init_path(&path);
1111         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112         btrfs_release_path(&path);
1113         if (ret > 0)
1114                 ret = -ENOENT;
1115         return ret;
1116 }
1117
1118 static int process_inode_item(struct extent_buffer *eb,
1119                               int slot, struct btrfs_key *key,
1120                               struct shared_node *active_node)
1121 {
1122         struct inode_record *rec;
1123         struct btrfs_inode_item *item;
1124
1125         rec = active_node->current;
1126         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127         if (rec->found_inode_item) {
1128                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1129                 return 1;
1130         }
1131         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132         rec->nlink = btrfs_inode_nlink(eb, item);
1133         rec->isize = btrfs_inode_size(eb, item);
1134         rec->nbytes = btrfs_inode_nbytes(eb, item);
1135         rec->imode = btrfs_inode_mode(eb, item);
1136         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1137                 rec->nodatasum = 1;
1138         rec->found_inode_item = 1;
1139         if (rec->nlink == 0)
1140                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141         maybe_free_inode_rec(&active_node->inode_cache, rec);
1142         return 0;
1143 }
1144
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1146                                                 const char *name,
1147                                                 int namelen, u64 dir)
1148 {
1149         struct inode_backref *backref;
1150
1151         list_for_each_entry(backref, &rec->backrefs, list) {
1152                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1153                         break;
1154                 if (backref->dir != dir || backref->namelen != namelen)
1155                         continue;
1156                 if (memcmp(name, backref->name, namelen))
1157                         continue;
1158                 return backref;
1159         }
1160
1161         backref = malloc(sizeof(*backref) + namelen + 1);
1162         if (!backref)
1163                 return NULL;
1164         memset(backref, 0, sizeof(*backref));
1165         backref->dir = dir;
1166         backref->namelen = namelen;
1167         memcpy(backref->name, name, namelen);
1168         backref->name[namelen] = '\0';
1169         list_add_tail(&backref->list, &rec->backrefs);
1170         return backref;
1171 }
1172
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174                              u64 ino, u64 dir, u64 index,
1175                              const char *name, int namelen,
1176                              u8 filetype, u8 itemtype, int errors)
1177 {
1178         struct inode_record *rec;
1179         struct inode_backref *backref;
1180
1181         rec = get_inode_rec(inode_cache, ino, 1);
1182         BUG_ON(IS_ERR(rec));
1183         backref = get_inode_backref(rec, name, namelen, dir);
1184         BUG_ON(!backref);
1185         if (errors)
1186                 backref->errors |= errors;
1187         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188                 if (backref->found_dir_index)
1189                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190                 if (backref->found_inode_ref && backref->index != index)
1191                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1192                 if (backref->found_dir_item && backref->filetype != filetype)
1193                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1194
1195                 backref->index = index;
1196                 backref->filetype = filetype;
1197                 backref->found_dir_index = 1;
1198         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1199                 rec->found_link++;
1200                 if (backref->found_dir_item)
1201                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202                 if (backref->found_dir_index && backref->filetype != filetype)
1203                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1204
1205                 backref->filetype = filetype;
1206                 backref->found_dir_item = 1;
1207         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209                 if (backref->found_inode_ref)
1210                         backref->errors |= REF_ERR_DUP_INODE_REF;
1211                 if (backref->found_dir_index && backref->index != index)
1212                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1213                 else
1214                         backref->index = index;
1215
1216                 backref->ref_type = itemtype;
1217                 backref->found_inode_ref = 1;
1218         } else {
1219                 BUG_ON(1);
1220         }
1221
1222         maybe_free_inode_rec(inode_cache, rec);
1223         return 0;
1224 }
1225
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227                             struct cache_tree *dst_cache)
1228 {
1229         struct inode_backref *backref;
1230         u32 dir_count = 0;
1231         int ret = 0;
1232
1233         dst->merging = 1;
1234         list_for_each_entry(backref, &src->backrefs, list) {
1235                 if (backref->found_dir_index) {
1236                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1237                                         backref->index, backref->name,
1238                                         backref->namelen, backref->filetype,
1239                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1240                 }
1241                 if (backref->found_dir_item) {
1242                         dir_count++;
1243                         add_inode_backref(dst_cache, dst->ino,
1244                                         backref->dir, 0, backref->name,
1245                                         backref->namelen, backref->filetype,
1246                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1247                 }
1248                 if (backref->found_inode_ref) {
1249                         add_inode_backref(dst_cache, dst->ino,
1250                                         backref->dir, backref->index,
1251                                         backref->name, backref->namelen, 0,
1252                                         backref->ref_type, backref->errors);
1253                 }
1254         }
1255
1256         if (src->found_dir_item)
1257                 dst->found_dir_item = 1;
1258         if (src->found_file_extent)
1259                 dst->found_file_extent = 1;
1260         if (src->found_csum_item)
1261                 dst->found_csum_item = 1;
1262         if (src->some_csum_missing)
1263                 dst->some_csum_missing = 1;
1264         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1266                 if (ret < 0)
1267                         return ret;
1268         }
1269
1270         BUG_ON(src->found_link < dir_count);
1271         dst->found_link += src->found_link - dir_count;
1272         dst->found_size += src->found_size;
1273         if (src->extent_start != (u64)-1) {
1274                 if (dst->extent_start == (u64)-1) {
1275                         dst->extent_start = src->extent_start;
1276                         dst->extent_end = src->extent_end;
1277                 } else {
1278                         if (dst->extent_end > src->extent_start)
1279                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280                         else if (dst->extent_end < src->extent_start) {
1281                                 ret = add_file_extent_hole(&dst->holes,
1282                                         dst->extent_end,
1283                                         src->extent_start - dst->extent_end);
1284                         }
1285                         if (dst->extent_end < src->extent_end)
1286                                 dst->extent_end = src->extent_end;
1287                 }
1288         }
1289
1290         dst->errors |= src->errors;
1291         if (src->found_inode_item) {
1292                 if (!dst->found_inode_item) {
1293                         dst->nlink = src->nlink;
1294                         dst->isize = src->isize;
1295                         dst->nbytes = src->nbytes;
1296                         dst->imode = src->imode;
1297                         dst->nodatasum = src->nodatasum;
1298                         dst->found_inode_item = 1;
1299                 } else {
1300                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1301                 }
1302         }
1303         dst->merging = 0;
1304
1305         return 0;
1306 }
1307
1308 static int splice_shared_node(struct shared_node *src_node,
1309                               struct shared_node *dst_node)
1310 {
1311         struct cache_extent *cache;
1312         struct ptr_node *node, *ins;
1313         struct cache_tree *src, *dst;
1314         struct inode_record *rec, *conflict;
1315         u64 current_ino = 0;
1316         int splice = 0;
1317         int ret;
1318
1319         if (--src_node->refs == 0)
1320                 splice = 1;
1321         if (src_node->current)
1322                 current_ino = src_node->current->ino;
1323
1324         src = &src_node->root_cache;
1325         dst = &dst_node->root_cache;
1326 again:
1327         cache = search_cache_extent(src, 0);
1328         while (cache) {
1329                 node = container_of(cache, struct ptr_node, cache);
1330                 rec = node->data;
1331                 cache = next_cache_extent(cache);
1332
1333                 if (splice) {
1334                         remove_cache_extent(src, &node->cache);
1335                         ins = node;
1336                 } else {
1337                         ins = malloc(sizeof(*ins));
1338                         BUG_ON(!ins);
1339                         ins->cache.start = node->cache.start;
1340                         ins->cache.size = node->cache.size;
1341                         ins->data = rec;
1342                         rec->refs++;
1343                 }
1344                 ret = insert_cache_extent(dst, &ins->cache);
1345                 if (ret == -EEXIST) {
1346                         conflict = get_inode_rec(dst, rec->ino, 1);
1347                         BUG_ON(IS_ERR(conflict));
1348                         merge_inode_recs(rec, conflict, dst);
1349                         if (rec->checked) {
1350                                 conflict->checked = 1;
1351                                 if (dst_node->current == conflict)
1352                                         dst_node->current = NULL;
1353                         }
1354                         maybe_free_inode_rec(dst, conflict);
1355                         free_inode_rec(rec);
1356                         free(ins);
1357                 } else {
1358                         BUG_ON(ret);
1359                 }
1360         }
1361
1362         if (src == &src_node->root_cache) {
1363                 src = &src_node->inode_cache;
1364                 dst = &dst_node->inode_cache;
1365                 goto again;
1366         }
1367
1368         if (current_ino > 0 && (!dst_node->current ||
1369             current_ino > dst_node->current->ino)) {
1370                 if (dst_node->current) {
1371                         dst_node->current->checked = 1;
1372                         maybe_free_inode_rec(dst, dst_node->current);
1373                 }
1374                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375                 BUG_ON(IS_ERR(dst_node->current));
1376         }
1377         return 0;
1378 }
1379
1380 static void free_inode_ptr(struct cache_extent *cache)
1381 {
1382         struct ptr_node *node;
1383         struct inode_record *rec;
1384
1385         node = container_of(cache, struct ptr_node, cache);
1386         rec = node->data;
1387         free_inode_rec(rec);
1388         free(node);
1389 }
1390
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1392
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1394                                             u64 bytenr)
1395 {
1396         struct cache_extent *cache;
1397         struct shared_node *node;
1398
1399         cache = lookup_cache_extent(shared, bytenr, 1);
1400         if (cache) {
1401                 node = container_of(cache, struct shared_node, cache);
1402                 return node;
1403         }
1404         return NULL;
1405 }
1406
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1408 {
1409         int ret;
1410         struct shared_node *node;
1411
1412         node = calloc(1, sizeof(*node));
1413         if (!node)
1414                 return -ENOMEM;
1415         node->cache.start = bytenr;
1416         node->cache.size = 1;
1417         cache_tree_init(&node->root_cache);
1418         cache_tree_init(&node->inode_cache);
1419         node->refs = refs;
1420
1421         ret = insert_cache_extent(shared, &node->cache);
1422
1423         return ret;
1424 }
1425
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427                              struct walk_control *wc, int level)
1428 {
1429         struct shared_node *node;
1430         struct shared_node *dest;
1431         int ret;
1432
1433         if (level == wc->active_node)
1434                 return 0;
1435
1436         BUG_ON(wc->active_node <= level);
1437         node = find_shared_node(&wc->shared, bytenr);
1438         if (!node) {
1439                 ret = add_shared_node(&wc->shared, bytenr, refs);
1440                 BUG_ON(ret);
1441                 node = find_shared_node(&wc->shared, bytenr);
1442                 wc->nodes[level] = node;
1443                 wc->active_node = level;
1444                 return 0;
1445         }
1446
1447         if (wc->root_level == wc->active_node &&
1448             btrfs_root_refs(&root->root_item) == 0) {
1449                 if (--node->refs == 0) {
1450                         free_inode_recs_tree(&node->root_cache);
1451                         free_inode_recs_tree(&node->inode_cache);
1452                         remove_cache_extent(&wc->shared, &node->cache);
1453                         free(node);
1454                 }
1455                 return 1;
1456         }
1457
1458         dest = wc->nodes[wc->active_node];
1459         splice_shared_node(node, dest);
1460         if (node->refs == 0) {
1461                 remove_cache_extent(&wc->shared, &node->cache);
1462                 free(node);
1463         }
1464         return 1;
1465 }
1466
1467 static int leave_shared_node(struct btrfs_root *root,
1468                              struct walk_control *wc, int level)
1469 {
1470         struct shared_node *node;
1471         struct shared_node *dest;
1472         int i;
1473
1474         if (level == wc->root_level)
1475                 return 0;
1476
1477         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1478                 if (wc->nodes[i])
1479                         break;
1480         }
1481         BUG_ON(i >= BTRFS_MAX_LEVEL);
1482
1483         node = wc->nodes[wc->active_node];
1484         wc->nodes[wc->active_node] = NULL;
1485         wc->active_node = i;
1486
1487         dest = wc->nodes[wc->active_node];
1488         if (wc->active_node < wc->root_level ||
1489             btrfs_root_refs(&root->root_item) > 0) {
1490                 BUG_ON(node->refs <= 1);
1491                 splice_shared_node(node, dest);
1492         } else {
1493                 BUG_ON(node->refs < 2);
1494                 node->refs--;
1495         }
1496         return 0;
1497 }
1498
1499 /*
1500  * Returns:
1501  * < 0 - on error
1502  * 1   - if the root with id child_root_id is a child of root parent_root_id
1503  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1504  *       has other root(s) as parent(s)
1505  * 2   - if the root child_root_id doesn't have any parent roots
1506  */
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1508                          u64 child_root_id)
1509 {
1510         struct btrfs_path path;
1511         struct btrfs_key key;
1512         struct extent_buffer *leaf;
1513         int has_parent = 0;
1514         int ret;
1515
1516         btrfs_init_path(&path);
1517
1518         key.objectid = parent_root_id;
1519         key.type = BTRFS_ROOT_REF_KEY;
1520         key.offset = child_root_id;
1521         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1522                                 0, 0);
1523         if (ret < 0)
1524                 return ret;
1525         btrfs_release_path(&path);
1526         if (!ret)
1527                 return 1;
1528
1529         key.objectid = child_root_id;
1530         key.type = BTRFS_ROOT_BACKREF_KEY;
1531         key.offset = 0;
1532         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1533                                 0, 0);
1534         if (ret < 0)
1535                 goto out;
1536
1537         while (1) {
1538                 leaf = path.nodes[0];
1539                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1541                         if (ret)
1542                                 break;
1543                         leaf = path.nodes[0];
1544                 }
1545
1546                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547                 if (key.objectid != child_root_id ||
1548                     key.type != BTRFS_ROOT_BACKREF_KEY)
1549                         break;
1550
1551                 has_parent = 1;
1552
1553                 if (key.offset == parent_root_id) {
1554                         btrfs_release_path(&path);
1555                         return 1;
1556                 }
1557
1558                 path.slots[0]++;
1559         }
1560 out:
1561         btrfs_release_path(&path);
1562         if (ret < 0)
1563                 return ret;
1564         return has_parent ? 0 : 2;
1565 }
1566
1567 static int process_dir_item(struct extent_buffer *eb,
1568                             int slot, struct btrfs_key *key,
1569                             struct shared_node *active_node)
1570 {
1571         u32 total;
1572         u32 cur = 0;
1573         u32 len;
1574         u32 name_len;
1575         u32 data_len;
1576         int error;
1577         int nritems = 0;
1578         u8 filetype;
1579         struct btrfs_dir_item *di;
1580         struct inode_record *rec;
1581         struct cache_tree *root_cache;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_key location;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         root_cache = &active_node->root_cache;
1587         inode_cache = &active_node->inode_cache;
1588         rec = active_node->current;
1589         rec->found_dir_item = 1;
1590
1591         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592         total = btrfs_item_size_nr(eb, slot);
1593         while (cur < total) {
1594                 nritems++;
1595                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596                 name_len = btrfs_dir_name_len(eb, di);
1597                 data_len = btrfs_dir_data_len(eb, di);
1598                 filetype = btrfs_dir_type(eb, di);
1599
1600                 rec->found_size += name_len;
1601                 if (cur + sizeof(*di) + name_len > total ||
1602                     name_len > BTRFS_NAME_LEN) {
1603                         error = REF_ERR_NAME_TOO_LONG;
1604
1605                         if (cur + sizeof(*di) > total)
1606                                 break;
1607                         len = min_t(u32, total - cur - sizeof(*di),
1608                                     BTRFS_NAME_LEN);
1609                 } else {
1610                         len = name_len;
1611                         error = 0;
1612                 }
1613
1614                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1615
1616                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617                     key->offset != btrfs_name_hash(namebuf, len)) {
1618                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1619                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620                         key->objectid, key->offset, namebuf, len, filetype,
1621                         key->offset, btrfs_name_hash(namebuf, len));
1622                 }
1623
1624                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625                         add_inode_backref(inode_cache, location.objectid,
1626                                           key->objectid, key->offset, namebuf,
1627                                           len, filetype, key->type, error);
1628                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629                         add_inode_backref(root_cache, location.objectid,
1630                                           key->objectid, key->offset,
1631                                           namebuf, len, filetype,
1632                                           key->type, error);
1633                 } else {
1634                         fprintf(stderr, "invalid location in dir item %u\n",
1635                                 location.type);
1636                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637                                           key->objectid, key->offset, namebuf,
1638                                           len, filetype, key->type, error);
1639                 }
1640
1641                 len = sizeof(*di) + name_len + data_len;
1642                 di = (struct btrfs_dir_item *)((char *)di + len);
1643                 cur += len;
1644         }
1645         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1647
1648         return 0;
1649 }
1650
1651 static int process_inode_ref(struct extent_buffer *eb,
1652                              int slot, struct btrfs_key *key,
1653                              struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         int error;
1661         struct cache_tree *inode_cache;
1662         struct btrfs_inode_ref *ref;
1663         char namebuf[BTRFS_NAME_LEN];
1664
1665         inode_cache = &active_node->inode_cache;
1666
1667         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668         total = btrfs_item_size_nr(eb, slot);
1669         while (cur < total) {
1670                 name_len = btrfs_inode_ref_name_len(eb, ref);
1671                 index = btrfs_inode_ref_index(eb, ref);
1672
1673                 /* inode_ref + namelen should not cross item boundary */
1674                 if (cur + sizeof(*ref) + name_len > total ||
1675                     name_len > BTRFS_NAME_LEN) {
1676                         if (total < cur + sizeof(*ref))
1677                                 break;
1678
1679                         /* Still try to read out the remaining part */
1680                         len = min_t(u32, total - cur - sizeof(*ref),
1681                                     BTRFS_NAME_LEN);
1682                         error = REF_ERR_NAME_TOO_LONG;
1683                 } else {
1684                         len = name_len;
1685                         error = 0;
1686                 }
1687
1688                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689                 add_inode_backref(inode_cache, key->objectid, key->offset,
1690                                   index, namebuf, len, 0, key->type, error);
1691
1692                 len = sizeof(*ref) + name_len;
1693                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1694                 cur += len;
1695         }
1696         return 0;
1697 }
1698
1699 static int process_inode_extref(struct extent_buffer *eb,
1700                                 int slot, struct btrfs_key *key,
1701                                 struct shared_node *active_node)
1702 {
1703         u32 total;
1704         u32 cur = 0;
1705         u32 len;
1706         u32 name_len;
1707         u64 index;
1708         u64 parent;
1709         int error;
1710         struct cache_tree *inode_cache;
1711         struct btrfs_inode_extref *extref;
1712         char namebuf[BTRFS_NAME_LEN];
1713
1714         inode_cache = &active_node->inode_cache;
1715
1716         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717         total = btrfs_item_size_nr(eb, slot);
1718         while (cur < total) {
1719                 name_len = btrfs_inode_extref_name_len(eb, extref);
1720                 index = btrfs_inode_extref_index(eb, extref);
1721                 parent = btrfs_inode_extref_parent(eb, extref);
1722                 if (name_len <= BTRFS_NAME_LEN) {
1723                         len = name_len;
1724                         error = 0;
1725                 } else {
1726                         len = BTRFS_NAME_LEN;
1727                         error = REF_ERR_NAME_TOO_LONG;
1728                 }
1729                 read_extent_buffer(eb, namebuf,
1730                                    (unsigned long)(extref + 1), len);
1731                 add_inode_backref(inode_cache, key->objectid, parent,
1732                                   index, namebuf, len, 0, key->type, error);
1733
1734                 len = sizeof(*extref) + name_len;
1735                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1736                 cur += len;
1737         }
1738         return 0;
1739
1740 }
1741
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743                             u64 len, u64 *found)
1744 {
1745         struct btrfs_key key;
1746         struct btrfs_path path;
1747         struct extent_buffer *leaf;
1748         int ret;
1749         size_t size;
1750         *found = 0;
1751         u64 csum_end;
1752         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1753
1754         btrfs_init_path(&path);
1755
1756         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1757         key.offset = start;
1758         key.type = BTRFS_EXTENT_CSUM_KEY;
1759
1760         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1761                                 &key, &path, 0, 0);
1762         if (ret < 0)
1763                 goto out;
1764         if (ret > 0 && path.slots[0] > 0) {
1765                 leaf = path.nodes[0];
1766                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768                     key.type == BTRFS_EXTENT_CSUM_KEY)
1769                         path.slots[0]--;
1770         }
1771
1772         while (len > 0) {
1773                 leaf = path.nodes[0];
1774                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1776                         if (ret > 0)
1777                                 break;
1778                         else if (ret < 0)
1779                                 goto out;
1780                         leaf = path.nodes[0];
1781                 }
1782
1783                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785                     key.type != BTRFS_EXTENT_CSUM_KEY)
1786                         break;
1787
1788                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789                 if (key.offset >= start + len)
1790                         break;
1791
1792                 if (key.offset > start)
1793                         start = key.offset;
1794
1795                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796                 csum_end = key.offset + (size / csum_size) *
1797                            root->fs_info->sectorsize;
1798                 if (csum_end > start) {
1799                         size = min(csum_end - start, len);
1800                         len -= size;
1801                         start += size;
1802                         *found += size;
1803                 }
1804
1805                 path.slots[0]++;
1806         }
1807 out:
1808         btrfs_release_path(&path);
1809         if (ret < 0)
1810                 return ret;
1811         return 0;
1812 }
1813
1814 static int process_file_extent(struct btrfs_root *root,
1815                                 struct extent_buffer *eb,
1816                                 int slot, struct btrfs_key *key,
1817                                 struct shared_node *active_node)
1818 {
1819         struct inode_record *rec;
1820         struct btrfs_file_extent_item *fi;
1821         u64 num_bytes = 0;
1822         u64 disk_bytenr = 0;
1823         u64 extent_offset = 0;
1824         u64 mask = root->fs_info->sectorsize - 1;
1825         int extent_type;
1826         int ret;
1827
1828         rec = active_node->current;
1829         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830         rec->found_file_extent = 1;
1831
1832         if (rec->extent_start == (u64)-1) {
1833                 rec->extent_start = key->offset;
1834                 rec->extent_end = key->offset;
1835         }
1836
1837         if (rec->extent_end > key->offset)
1838                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839         else if (rec->extent_end < key->offset) {
1840                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841                                            key->offset - rec->extent_end);
1842                 if (ret < 0)
1843                         return ret;
1844         }
1845
1846         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847         extent_type = btrfs_file_extent_type(eb, fi);
1848
1849         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1851                 if (num_bytes == 0)
1852                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853                 rec->found_size += num_bytes;
1854                 num_bytes = (num_bytes + mask) & ~mask;
1855         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859                 extent_offset = btrfs_file_extent_offset(eb, fi);
1860                 if (num_bytes == 0 || (num_bytes & mask))
1861                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862                 if (num_bytes + extent_offset >
1863                     btrfs_file_extent_ram_bytes(eb, fi))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866                     (btrfs_file_extent_compression(eb, fi) ||
1867                      btrfs_file_extent_encryption(eb, fi) ||
1868                      btrfs_file_extent_other_encoding(eb, fi)))
1869                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870                 if (disk_bytenr > 0)
1871                         rec->found_size += num_bytes;
1872         } else {
1873                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874         }
1875         rec->extent_end = key->offset + num_bytes;
1876
1877         /*
1878          * The data reloc tree will copy full extents into its inode and then
1879          * copy the corresponding csums.  Because the extent it copied could be
1880          * a preallocated extent that hasn't been written to yet there may be no
1881          * csums to copy, ergo we won't have csums for our file extent.  This is
1882          * ok so just don't bother checking csums if the inode belongs to the
1883          * data reloc tree.
1884          */
1885         if (disk_bytenr > 0 &&
1886             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1887                 u64 found;
1888                 if (btrfs_file_extent_compression(eb, fi))
1889                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1890                 else
1891                         disk_bytenr += extent_offset;
1892
1893                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1894                 if (ret < 0)
1895                         return ret;
1896                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1897                         if (found > 0)
1898                                 rec->found_csum_item = 1;
1899                         if (found < num_bytes)
1900                                 rec->some_csum_missing = 1;
1901                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1902                         if (found > 0)
1903                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1904                 }
1905         }
1906         return 0;
1907 }
1908
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910                             struct walk_control *wc)
1911 {
1912         struct btrfs_key key;
1913         u32 nritems;
1914         int i;
1915         int ret = 0;
1916         struct cache_tree *inode_cache;
1917         struct shared_node *active_node;
1918
1919         if (wc->root_level == wc->active_node &&
1920             btrfs_root_refs(&root->root_item) == 0)
1921                 return 0;
1922
1923         active_node = wc->nodes[wc->active_node];
1924         inode_cache = &active_node->inode_cache;
1925         nritems = btrfs_header_nritems(eb);
1926         for (i = 0; i < nritems; i++) {
1927                 btrfs_item_key_to_cpu(eb, &key, i);
1928
1929                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1930                         continue;
1931                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1932                         continue;
1933
1934                 if (active_node->current == NULL ||
1935                     active_node->current->ino < key.objectid) {
1936                         if (active_node->current) {
1937                                 active_node->current->checked = 1;
1938                                 maybe_free_inode_rec(inode_cache,
1939                                                      active_node->current);
1940                         }
1941                         active_node->current = get_inode_rec(inode_cache,
1942                                                              key.objectid, 1);
1943                         BUG_ON(IS_ERR(active_node->current));
1944                 }
1945                 switch (key.type) {
1946                 case BTRFS_DIR_ITEM_KEY:
1947                 case BTRFS_DIR_INDEX_KEY:
1948                         ret = process_dir_item(eb, i, &key, active_node);
1949                         break;
1950                 case BTRFS_INODE_REF_KEY:
1951                         ret = process_inode_ref(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_EXTREF_KEY:
1954                         ret = process_inode_extref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_ITEM_KEY:
1957                         ret = process_inode_item(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_EXTENT_DATA_KEY:
1960                         ret = process_file_extent(root, eb, i, &key,
1961                                                   active_node);
1962                         break;
1963                 default:
1964                         break;
1965                 };
1966         }
1967         return ret;
1968 }
1969
1970 struct node_refs {
1971         u64 bytenr[BTRFS_MAX_LEVEL];
1972         u64 refs[BTRFS_MAX_LEVEL];
1973         int need_check[BTRFS_MAX_LEVEL];
1974 };
1975
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977                              struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979                             unsigned int ext_ref);
1980
1981 /*
1982  * Returns >0  Found error, not fatal, should continue
1983  * Returns <0  Fatal error, must exit the whole check
1984  * Returns 0   No errors found
1985  */
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987                                struct node_refs *nrefs, int *level, int ext_ref)
1988 {
1989         struct extent_buffer *cur = path->nodes[0];
1990         struct btrfs_key key;
1991         u64 cur_bytenr;
1992         u32 nritems;
1993         u64 first_ino = 0;
1994         int root_level = btrfs_header_level(root->node);
1995         int i;
1996         int ret = 0; /* Final return value */
1997         int err = 0; /* Positive error bitmap */
1998
1999         cur_bytenr = cur->start;
2000
2001         /* skip to first inode item or the first inode number change */
2002         nritems = btrfs_header_nritems(cur);
2003         for (i = 0; i < nritems; i++) {
2004                 btrfs_item_key_to_cpu(cur, &key, i);
2005                 if (i == 0)
2006                         first_ino = key.objectid;
2007                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008                     (first_ino && first_ino != key.objectid))
2009                         break;
2010         }
2011         if (i == nritems) {
2012                 path->slots[0] = nritems;
2013                 return 0;
2014         }
2015         path->slots[0] = i;
2016
2017 again:
2018         err |= check_inode_item(root, path, ext_ref);
2019
2020         /* modify cur since check_inode_item may change path */
2021         cur = path->nodes[0];
2022
2023         if (err & LAST_ITEM)
2024                 goto out;
2025
2026         /* still have inode items in thie leaf */
2027         if (cur->start == cur_bytenr)
2028                 goto again;
2029
2030         /*
2031          * we have switched to another leaf, above nodes may
2032          * have changed, here walk down the path, if a node
2033          * or leaf is shared, check whether we can skip this
2034          * node or leaf.
2035          */
2036         for (i = root_level; i >= 0; i--) {
2037                 if (path->nodes[i]->start == nrefs->bytenr[i])
2038                         continue;
2039
2040                 ret = update_nodes_refs(root,
2041                                 path->nodes[i]->start,
2042                                 nrefs, i);
2043                 if (ret)
2044                         goto out;
2045
2046                 if (!nrefs->need_check[i]) {
2047                         *level += 1;
2048                         break;
2049                 }
2050         }
2051
2052         for (i = 0; i < *level; i++) {
2053                 free_extent_buffer(path->nodes[i]);
2054                 path->nodes[i] = NULL;
2055         }
2056 out:
2057         err &= ~LAST_ITEM;
2058         if (err && !ret)
2059                 ret = err;
2060         return ret;
2061 }
2062
2063 static void reada_walk_down(struct btrfs_root *root,
2064                             struct extent_buffer *node, int slot)
2065 {
2066         struct btrfs_fs_info *fs_info = root->fs_info;
2067         u64 bytenr;
2068         u64 ptr_gen;
2069         u32 nritems;
2070         int i;
2071         int level;
2072
2073         level = btrfs_header_level(node);
2074         if (level != 1)
2075                 return;
2076
2077         nritems = btrfs_header_nritems(node);
2078         for (i = slot; i < nritems; i++) {
2079                 bytenr = btrfs_node_blockptr(node, i);
2080                 ptr_gen = btrfs_node_ptr_generation(node, i);
2081                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2082         }
2083 }
2084
2085 /*
2086  * Check the child node/leaf by the following condition:
2087  * 1. the first item key of the node/leaf should be the same with the one
2088  *    in parent.
2089  * 2. block in parent node should match the child node/leaf.
2090  * 3. generation of parent node and child's header should be consistent.
2091  *
2092  * Or the child node/leaf pointed by the key in parent is not valid.
2093  *
2094  * We hope to check leaf owner too, but since subvol may share leaves,
2095  * which makes leaf owner check not so strong, key check should be
2096  * sufficient enough for that case.
2097  */
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099                             struct extent_buffer *child)
2100 {
2101         struct btrfs_key parent_key;
2102         struct btrfs_key child_key;
2103         int ret = 0;
2104
2105         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106         if (btrfs_header_level(child) == 0)
2107                 btrfs_item_key_to_cpu(child, &child_key, 0);
2108         else
2109                 btrfs_node_key_to_cpu(child, &child_key, 0);
2110
2111         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2112                 ret = -EINVAL;
2113                 fprintf(stderr,
2114                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115                         parent_key.objectid, parent_key.type, parent_key.offset,
2116                         child_key.objectid, child_key.type, child_key.offset);
2117         }
2118         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2119                 ret = -EINVAL;
2120                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121                         btrfs_node_blockptr(parent, slot),
2122                         btrfs_header_bytenr(child));
2123         }
2124         if (btrfs_node_ptr_generation(parent, slot) !=
2125             btrfs_header_generation(child)) {
2126                 ret = -EINVAL;
2127                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128                         btrfs_header_generation(child),
2129                         btrfs_node_ptr_generation(parent, slot));
2130         }
2131         return ret;
2132 }
2133
2134 /*
2135  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136  * in every fs or file tree check. Here we find its all root ids, and only check
2137  * it in the fs or file tree which has the smallest root id.
2138  */
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2140 {
2141         struct rb_node *node;
2142         struct ulist_node *u;
2143
2144         if (roots->nnodes == 1)
2145                 return 1;
2146
2147         node = rb_first(&roots->root);
2148         u = rb_entry(node, struct ulist_node, rb_node);
2149         /*
2150          * current root id is not smallest, we skip it and let it be checked
2151          * in the fs or file tree who hash the smallest root id.
2152          */
2153         if (root->objectid != u->val)
2154                 return 0;
2155
2156         return 1;
2157 }
2158
2159 /*
2160  * for a tree node or leaf, we record its reference count, so later if we still
2161  * process this node or leaf, don't need to compute its reference count again.
2162  */
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164                              struct node_refs *nrefs, u64 level)
2165 {
2166         int check, ret;
2167         u64 refs;
2168         struct ulist *roots;
2169
2170         if (nrefs->bytenr[level] != bytenr) {
2171                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172                                        level, 1, &refs, NULL);
2173                 if (ret < 0)
2174                         return ret;
2175
2176                 nrefs->bytenr[level] = bytenr;
2177                 nrefs->refs[level] = refs;
2178                 if (refs > 1) {
2179                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2180                                                    0, &roots);
2181                         if (ret)
2182                                 return -EIO;
2183
2184                         check = need_check(root, roots);
2185                         ulist_free(roots);
2186                         nrefs->need_check[level] = check;
2187                 } else {
2188                         nrefs->need_check[level] = 1;
2189                 }
2190         }
2191
2192         return 0;
2193 }
2194
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196                           struct walk_control *wc, int *level,
2197                           struct node_refs *nrefs)
2198 {
2199         enum btrfs_tree_block_status status;
2200         u64 bytenr;
2201         u64 ptr_gen;
2202         struct btrfs_fs_info *fs_info = root->fs_info;
2203         struct extent_buffer *next;
2204         struct extent_buffer *cur;
2205         int ret, err = 0;
2206         u64 refs;
2207
2208         WARN_ON(*level < 0);
2209         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2210
2211         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212                 refs = nrefs->refs[*level];
2213                 ret = 0;
2214         } else {
2215                 ret = btrfs_lookup_extent_info(NULL, root,
2216                                        path->nodes[*level]->start,
2217                                        *level, 1, &refs, NULL);
2218                 if (ret < 0) {
2219                         err = ret;
2220                         goto out;
2221                 }
2222                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223                 nrefs->refs[*level] = refs;
2224         }
2225
2226         if (refs > 1) {
2227                 ret = enter_shared_node(root, path->nodes[*level]->start,
2228                                         refs, wc, *level);
2229                 if (ret > 0) {
2230                         err = ret;
2231                         goto out;
2232                 }
2233         }
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 if (*level == 0) {
2246                         ret = process_one_leaf(root, cur, wc);
2247                         if (ret < 0)
2248                                 err = ret;
2249                         break;
2250                 }
2251                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2253
2254                 if (bytenr == nrefs->bytenr[*level - 1]) {
2255                         refs = nrefs->refs[*level - 1];
2256                 } else {
2257                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258                                         *level - 1, 1, &refs, NULL);
2259                         if (ret < 0) {
2260                                 refs = 0;
2261                         } else {
2262                                 nrefs->bytenr[*level - 1] = bytenr;
2263                                 nrefs->refs[*level - 1] = refs;
2264                         }
2265                 }
2266
2267                 if (refs > 1) {
2268                         ret = enter_shared_node(root, bytenr, refs,
2269                                                 wc, *level - 1);
2270                         if (ret > 0) {
2271                                 path->slots[*level]++;
2272                                 continue;
2273                         }
2274                 }
2275
2276                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278                         free_extent_buffer(next);
2279                         reada_walk_down(root, cur, path->slots[*level]);
2280                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281                         if (!extent_buffer_uptodate(next)) {
2282                                 struct btrfs_key node_key;
2283
2284                                 btrfs_node_key_to_cpu(path->nodes[*level],
2285                                                       &node_key,
2286                                                       path->slots[*level]);
2287                                 btrfs_add_corrupt_extent_record(root->fs_info,
2288                                                 &node_key,
2289                                                 path->nodes[*level]->start,
2290                                                 root->fs_info->nodesize,
2291                                                 *level);
2292                                 err = -EIO;
2293                                 goto out;
2294                         }
2295                 }
2296
2297                 ret = check_child_node(cur, path->slots[*level], next);
2298                 if (ret) {
2299                         free_extent_buffer(next);
2300                         err = ret;
2301                         goto out;
2302                 }
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         err = -EIO;
2311                         goto out;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319 out:
2320         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2321         return err;
2322 }
2323
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325                             unsigned int ext_ref);
2326
2327 /*
2328  * Returns >0  Found error, should continue
2329  * Returns <0  Fatal error, must exit the whole check
2330  * Returns 0   No errors found
2331  */
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333                              int *level, struct node_refs *nrefs, int ext_ref)
2334 {
2335         enum btrfs_tree_block_status status;
2336         u64 bytenr;
2337         u64 ptr_gen;
2338         struct btrfs_fs_info *fs_info = root->fs_info;
2339         struct extent_buffer *next;
2340         struct extent_buffer *cur;
2341         int ret;
2342
2343         WARN_ON(*level < 0);
2344         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2345
2346         ret = update_nodes_refs(root, path->nodes[*level]->start,
2347                                 nrefs, *level);
2348         if (ret < 0)
2349                 return ret;
2350
2351         while (*level >= 0) {
2352                 WARN_ON(*level < 0);
2353                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354                 cur = path->nodes[*level];
2355
2356                 if (btrfs_header_level(cur) != *level)
2357                         WARN_ON(1);
2358
2359                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2360                         break;
2361                 /* Don't forgot to check leaf/node validation */
2362                 if (*level == 0) {
2363                         ret = btrfs_check_leaf(root, NULL, cur);
2364                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2365                                 ret = -EIO;
2366                                 break;
2367                         }
2368                         ret = process_one_leaf_v2(root, path, nrefs,
2369                                                   level, ext_ref);
2370                         cur = path->nodes[*level];
2371                         break;
2372                 } else {
2373                         ret = btrfs_check_node(root, NULL, cur);
2374                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2375                                 ret = -EIO;
2376                                 break;
2377                         }
2378                 }
2379                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2381
2382                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2383                 if (ret)
2384                         break;
2385                 if (!nrefs->need_check[*level - 1]) {
2386                         path->slots[*level]++;
2387                         continue;
2388                 }
2389
2390                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392                         free_extent_buffer(next);
2393                         reada_walk_down(root, cur, path->slots[*level]);
2394                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2395                         if (!extent_buffer_uptodate(next)) {
2396                                 struct btrfs_key node_key;
2397
2398                                 btrfs_node_key_to_cpu(path->nodes[*level],
2399                                                       &node_key,
2400                                                       path->slots[*level]);
2401                                 btrfs_add_corrupt_extent_record(fs_info,
2402                                                 &node_key,
2403                                                 path->nodes[*level]->start,
2404                                                 fs_info->nodesize,
2405                                                 *level);
2406                                 ret = -EIO;
2407                                 break;
2408                         }
2409                 }
2410
2411                 ret = check_child_node(cur, path->slots[*level], next);
2412                 if (ret < 0) 
2413                         break;
2414
2415                 if (btrfs_is_leaf(next))
2416                         status = btrfs_check_leaf(root, NULL, next);
2417                 else
2418                         status = btrfs_check_node(root, NULL, next);
2419                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420                         free_extent_buffer(next);
2421                         ret = -EIO;
2422                         break;
2423                 }
2424
2425                 *level = *level - 1;
2426                 free_extent_buffer(path->nodes[*level]);
2427                 path->nodes[*level] = next;
2428                 path->slots[*level] = 0;
2429         }
2430         return ret;
2431 }
2432
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434                         struct walk_control *wc, int *level)
2435 {
2436         int i;
2437         struct extent_buffer *leaf;
2438
2439         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440                 leaf = path->nodes[i];
2441                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2442                         path->slots[i]++;
2443                         *level = i;
2444                         return 0;
2445                 } else {
2446                         free_extent_buffer(path->nodes[*level]);
2447                         path->nodes[*level] = NULL;
2448                         BUG_ON(*level > wc->active_node);
2449                         if (*level == wc->active_node)
2450                                 leave_shared_node(root, wc, *level);
2451                         *level = i + 1;
2452                 }
2453         }
2454         return 1;
2455 }
2456
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2458                            int *level)
2459 {
2460         int i;
2461         struct extent_buffer *leaf;
2462
2463         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464                 leaf = path->nodes[i];
2465                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2466                         path->slots[i]++;
2467                         *level = i;
2468                         return 0;
2469                 } else {
2470                         free_extent_buffer(path->nodes[*level]);
2471                         path->nodes[*level] = NULL;
2472                         *level = i + 1;
2473                 }
2474         }
2475         return 1;
2476 }
2477
2478 static int check_root_dir(struct inode_record *rec)
2479 {
2480         struct inode_backref *backref;
2481         int ret = -1;
2482
2483         if (!rec->found_inode_item || rec->errors)
2484                 goto out;
2485         if (rec->nlink != 1 || rec->found_link != 0)
2486                 goto out;
2487         if (list_empty(&rec->backrefs))
2488                 goto out;
2489         backref = to_inode_backref(rec->backrefs.next);
2490         if (!backref->found_inode_ref)
2491                 goto out;
2492         if (backref->index != 0 || backref->namelen != 2 ||
2493             memcmp(backref->name, "..", 2))
2494                 goto out;
2495         if (backref->found_dir_index || backref->found_dir_item)
2496                 goto out;
2497         ret = 0;
2498 out:
2499         return ret;
2500 }
2501
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503                               struct btrfs_root *root, struct btrfs_path *path,
2504                               struct inode_record *rec)
2505 {
2506         struct btrfs_inode_item *ei;
2507         struct btrfs_key key;
2508         int ret;
2509
2510         key.objectid = rec->ino;
2511         key.type = BTRFS_INODE_ITEM_KEY;
2512         key.offset = (u64)-1;
2513
2514         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2515         if (ret < 0)
2516                 goto out;
2517         if (ret) {
2518                 if (!path->slots[0]) {
2519                         ret = -ENOENT;
2520                         goto out;
2521                 }
2522                 path->slots[0]--;
2523                 ret = 0;
2524         }
2525         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526         if (key.objectid != rec->ino) {
2527                 ret = -ENOENT;
2528                 goto out;
2529         }
2530
2531         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532                             struct btrfs_inode_item);
2533         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534         btrfs_mark_buffer_dirty(path->nodes[0]);
2535         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537                root->root_key.objectid);
2538 out:
2539         btrfs_release_path(path);
2540         return ret;
2541 }
2542
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544                                     struct btrfs_root *root,
2545                                     struct btrfs_path *path,
2546                                     struct inode_record *rec)
2547 {
2548         int ret;
2549
2550         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551         btrfs_release_path(path);
2552         if (!ret)
2553                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2554         return ret;
2555 }
2556
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558                                struct btrfs_root *root,
2559                                struct btrfs_path *path,
2560                                struct inode_record *rec)
2561 {
2562         struct btrfs_inode_item *ei;
2563         struct btrfs_key key;
2564         int ret = 0;
2565
2566         key.objectid = rec->ino;
2567         key.type = BTRFS_INODE_ITEM_KEY;
2568         key.offset = 0;
2569
2570         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2571         if (ret) {
2572                 if (ret > 0)
2573                         ret = -ENOENT;
2574                 goto out;
2575         }
2576
2577         /* Since ret == 0, no need to check anything */
2578         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579                             struct btrfs_inode_item);
2580         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581         btrfs_mark_buffer_dirty(path->nodes[0]);
2582         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583         printf("reset nbytes for ino %llu root %llu\n",
2584                rec->ino, root->root_key.objectid);
2585 out:
2586         btrfs_release_path(path);
2587         return ret;
2588 }
2589
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591                                  struct cache_tree *inode_cache,
2592                                  struct inode_record *rec,
2593                                  struct inode_backref *backref)
2594 {
2595         struct btrfs_path path;
2596         struct btrfs_trans_handle *trans;
2597         struct btrfs_dir_item *dir_item;
2598         struct extent_buffer *leaf;
2599         struct btrfs_key key;
2600         struct btrfs_disk_key disk_key;
2601         struct inode_record *dir_rec;
2602         unsigned long name_ptr;
2603         u32 data_size = sizeof(*dir_item) + backref->namelen;
2604         int ret;
2605
2606         trans = btrfs_start_transaction(root, 1);
2607         if (IS_ERR(trans))
2608                 return PTR_ERR(trans);
2609
2610         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611                 (unsigned long long)rec->ino);
2612
2613         btrfs_init_path(&path);
2614         key.objectid = backref->dir;
2615         key.type = BTRFS_DIR_INDEX_KEY;
2616         key.offset = backref->index;
2617         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2618         BUG_ON(ret);
2619
2620         leaf = path.nodes[0];
2621         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2622
2623         disk_key.objectid = cpu_to_le64(rec->ino);
2624         disk_key.type = BTRFS_INODE_ITEM_KEY;
2625         disk_key.offset = 0;
2626
2627         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629         btrfs_set_dir_data_len(leaf, dir_item, 0);
2630         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631         name_ptr = (unsigned long)(dir_item + 1);
2632         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633         btrfs_mark_buffer_dirty(leaf);
2634         btrfs_release_path(&path);
2635         btrfs_commit_transaction(trans, root);
2636
2637         backref->found_dir_index = 1;
2638         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639         BUG_ON(IS_ERR(dir_rec));
2640         if (!dir_rec)
2641                 return 0;
2642         dir_rec->found_size += backref->namelen;
2643         if (dir_rec->found_size == dir_rec->isize &&
2644             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646         if (dir_rec->found_size != dir_rec->isize)
2647                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2648
2649         return 0;
2650 }
2651
2652 static int delete_dir_index(struct btrfs_root *root,
2653                             struct inode_backref *backref)
2654 {
2655         struct btrfs_trans_handle *trans;
2656         struct btrfs_dir_item *di;
2657         struct btrfs_path path;
2658         int ret = 0;
2659
2660         trans = btrfs_start_transaction(root, 1);
2661         if (IS_ERR(trans))
2662                 return PTR_ERR(trans);
2663
2664         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665                 (unsigned long long)backref->dir,
2666                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667                 (unsigned long long)root->objectid);
2668
2669         btrfs_init_path(&path);
2670         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671                                     backref->name, backref->namelen,
2672                                     backref->index, -1);
2673         if (IS_ERR(di)) {
2674                 ret = PTR_ERR(di);
2675                 btrfs_release_path(&path);
2676                 btrfs_commit_transaction(trans, root);
2677                 if (ret == -ENOENT)
2678                         return 0;
2679                 return ret;
2680         }
2681
2682         if (!di)
2683                 ret = btrfs_del_item(trans, root, &path);
2684         else
2685                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2686         BUG_ON(ret);
2687         btrfs_release_path(&path);
2688         btrfs_commit_transaction(trans, root);
2689         return ret;
2690 }
2691
2692 static int create_inode_item(struct btrfs_root *root,
2693                              struct inode_record *rec,
2694                              int root_dir)
2695 {
2696         struct btrfs_trans_handle *trans;
2697         struct btrfs_inode_item inode_item;
2698         time_t now = time(NULL);
2699         int ret;
2700
2701         trans = btrfs_start_transaction(root, 1);
2702         if (IS_ERR(trans)) {
2703                 ret = PTR_ERR(trans);
2704                 return ret;
2705         }
2706
2707         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708                 "be incomplete, please check permissions and content after "
2709                 "the fsck completes.\n", (unsigned long long)root->objectid,
2710                 (unsigned long long)rec->ino);
2711
2712         memset(&inode_item, 0, sizeof(inode_item));
2713         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2714         if (root_dir)
2715                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2716         else
2717                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719         if (rec->found_dir_item) {
2720                 if (rec->found_file_extent)
2721                         fprintf(stderr, "root %llu inode %llu has both a dir "
2722                                 "item and extents, unsure if it is a dir or a "
2723                                 "regular file so setting it as a directory\n",
2724                                 (unsigned long long)root->objectid,
2725                                 (unsigned long long)rec->ino);
2726                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728         } else if (!rec->found_dir_item) {
2729                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2731         }
2732         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2740
2741         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2742         BUG_ON(ret);
2743         btrfs_commit_transaction(trans, root);
2744         return 0;
2745 }
2746
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748                                  struct inode_record *rec,
2749                                  struct cache_tree *inode_cache,
2750                                  int delete)
2751 {
2752         struct inode_backref *tmp, *backref;
2753         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2754         int ret = 0;
2755         int repaired = 0;
2756
2757         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758                 if (!delete && rec->ino == root_dirid) {
2759                         if (!rec->found_inode_item) {
2760                                 ret = create_inode_item(root, rec, 1);
2761                                 if (ret)
2762                                         break;
2763                                 repaired++;
2764                         }
2765                 }
2766
2767                 /* Index 0 for root dir's are special, don't mess with it */
2768                 if (rec->ino == root_dirid && backref->index == 0)
2769                         continue;
2770
2771                 if (delete &&
2772                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2773                      (backref->found_dir_index && backref->found_inode_ref &&
2774                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775                         ret = delete_dir_index(root, backref);
2776                         if (ret)
2777                                 break;
2778                         repaired++;
2779                         list_del(&backref->list);
2780                         free(backref);
2781                         continue;
2782                 }
2783
2784                 if (!delete && !backref->found_dir_index &&
2785                     backref->found_dir_item && backref->found_inode_ref) {
2786                         ret = add_missing_dir_index(root, inode_cache, rec,
2787                                                     backref);
2788                         if (ret)
2789                                 break;
2790                         repaired++;
2791                         if (backref->found_dir_item &&
2792                             backref->found_dir_index) {
2793                                 if (!backref->errors &&
2794                                     backref->found_inode_ref) {
2795                                         list_del(&backref->list);
2796                                         free(backref);
2797                                         continue;
2798                                 }
2799                         }
2800                 }
2801
2802                 if (!delete && (!backref->found_dir_index &&
2803                                 !backref->found_dir_item &&
2804                                 backref->found_inode_ref)) {
2805                         struct btrfs_trans_handle *trans;
2806                         struct btrfs_key location;
2807
2808                         ret = check_dir_conflict(root, backref->name,
2809                                                  backref->namelen,
2810                                                  backref->dir,
2811                                                  backref->index);
2812                         if (ret) {
2813                                 /*
2814                                  * let nlink fixing routine to handle it,
2815                                  * which can do it better.
2816                                  */
2817                                 ret = 0;
2818                                 break;
2819                         }
2820                         location.objectid = rec->ino;
2821                         location.type = BTRFS_INODE_ITEM_KEY;
2822                         location.offset = 0;
2823
2824                         trans = btrfs_start_transaction(root, 1);
2825                         if (IS_ERR(trans)) {
2826                                 ret = PTR_ERR(trans);
2827                                 break;
2828                         }
2829                         fprintf(stderr, "adding missing dir index/item pair "
2830                                 "for inode %llu\n",
2831                                 (unsigned long long)rec->ino);
2832                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2833                                                     backref->namelen,
2834                                                     backref->dir, &location,
2835                                                     imode_to_type(rec->imode),
2836                                                     backref->index);
2837                         BUG_ON(ret);
2838                         btrfs_commit_transaction(trans, root);
2839                         repaired++;
2840                 }
2841
2842                 if (!delete && (backref->found_inode_ref &&
2843                                 backref->found_dir_index &&
2844                                 backref->found_dir_item &&
2845                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846                                 !rec->found_inode_item)) {
2847                         ret = create_inode_item(root, rec, 0);
2848                         if (ret)
2849                                 break;
2850                         repaired++;
2851                 }
2852
2853         }
2854         return ret ? ret : repaired;
2855 }
2856
2857 /*
2858  * To determine the file type for nlink/inode_item repair
2859  *
2860  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861  * Return -ENOENT if file type is not found.
2862  */
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2864 {
2865         struct inode_backref *backref;
2866
2867         /* For inode item recovered case */
2868         if (rec->found_inode_item) {
2869                 *type = imode_to_type(rec->imode);
2870                 return 0;
2871         }
2872
2873         list_for_each_entry(backref, &rec->backrefs, list) {
2874                 if (backref->found_dir_index || backref->found_dir_item) {
2875                         *type = backref->filetype;
2876                         return 0;
2877                 }
2878         }
2879         return -ENOENT;
2880 }
2881
2882 /*
2883  * To determine the file name for nlink repair
2884  *
2885  * Return 0 if file name is found, set name and namelen.
2886  * Return -ENOENT if file name is not found.
2887  */
2888 static int find_file_name(struct inode_record *rec,
2889                           char *name, int *namelen)
2890 {
2891         struct inode_backref *backref;
2892
2893         list_for_each_entry(backref, &rec->backrefs, list) {
2894                 if (backref->found_dir_index || backref->found_dir_item ||
2895                     backref->found_inode_ref) {
2896                         memcpy(name, backref->name, backref->namelen);
2897                         *namelen = backref->namelen;
2898                         return 0;
2899                 }
2900         }
2901         return -ENOENT;
2902 }
2903
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906                        struct btrfs_root *root,
2907                        struct btrfs_path *path,
2908                        struct inode_record *rec)
2909 {
2910         struct inode_backref *backref;
2911         struct inode_backref *tmp;
2912         struct btrfs_key key;
2913         struct btrfs_inode_item *inode_item;
2914         int ret = 0;
2915
2916         /* We don't believe this either, reset it and iterate backref */
2917         rec->found_link = 0;
2918
2919         /* Remove all backref including the valid ones */
2920         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922                                    backref->index, backref->name,
2923                                    backref->namelen, 0);
2924                 if (ret < 0)
2925                         goto out;
2926
2927                 /* remove invalid backref, so it won't be added back */
2928                 if (!(backref->found_dir_index &&
2929                       backref->found_dir_item &&
2930                       backref->found_inode_ref)) {
2931                         list_del(&backref->list);
2932                         free(backref);
2933                 } else {
2934                         rec->found_link++;
2935                 }
2936         }
2937
2938         /* Set nlink to 0 */
2939         key.objectid = rec->ino;
2940         key.type = BTRFS_INODE_ITEM_KEY;
2941         key.offset = 0;
2942         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2943         if (ret < 0)
2944                 goto out;
2945         if (ret > 0) {
2946                 ret = -ENOENT;
2947                 goto out;
2948         }
2949         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950                                     struct btrfs_inode_item);
2951         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952         btrfs_mark_buffer_dirty(path->nodes[0]);
2953         btrfs_release_path(path);
2954
2955         /*
2956          * Add back valid inode_ref/dir_item/dir_index,
2957          * add_link() will handle the nlink inc, so new nlink must be correct
2958          */
2959         list_for_each_entry(backref, &rec->backrefs, list) {
2960                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961                                      backref->name, backref->namelen,
2962                                      backref->filetype, &backref->index, 1);
2963                 if (ret < 0)
2964                         goto out;
2965         }
2966 out:
2967         btrfs_release_path(path);
2968         return ret;
2969 }
2970
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972                                 struct btrfs_root *root,
2973                                 struct btrfs_path *path,
2974                                 u64 *highest_ino)
2975 {
2976         struct btrfs_key key, found_key;
2977         int ret;
2978
2979         btrfs_init_path(path);
2980         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2981         key.offset = -1;
2982         key.type = BTRFS_INODE_ITEM_KEY;
2983         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2984         if (ret == 1) {
2985                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986                                 path->slots[0] - 1);
2987                 *highest_ino = found_key.objectid;
2988                 ret = 0;
2989         }
2990         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2991                 ret = -EOVERFLOW;
2992         btrfs_release_path(path);
2993         return ret;
2994 }
2995
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997                                struct btrfs_root *root,
2998                                struct btrfs_path *path,
2999                                struct inode_record *rec)
3000 {
3001         char *dir_name = "lost+found";
3002         char namebuf[BTRFS_NAME_LEN] = {0};
3003         u64 lost_found_ino;
3004         u32 mode = 0700;
3005         u8 type = 0;
3006         int namelen = 0;
3007         int name_recovered = 0;
3008         int type_recovered = 0;
3009         int ret = 0;
3010
3011         /*
3012          * Get file name and type first before these invalid inode ref
3013          * are deleted by remove_all_invalid_backref()
3014          */
3015         name_recovered = !find_file_name(rec, namebuf, &namelen);
3016         type_recovered = !find_file_type(rec, &type);
3017
3018         if (!name_recovered) {
3019                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020                        rec->ino, rec->ino);
3021                 namelen = count_digits(rec->ino);
3022                 sprintf(namebuf, "%llu", rec->ino);
3023                 name_recovered = 1;
3024         }
3025         if (!type_recovered) {
3026                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3027                        rec->ino);
3028                 type = BTRFS_FT_REG_FILE;
3029                 type_recovered = 1;
3030         }
3031
3032         ret = reset_nlink(trans, root, path, rec);
3033         if (ret < 0) {
3034                 fprintf(stderr,
3035                         "Failed to reset nlink for inode %llu: %s\n",
3036                         rec->ino, strerror(-ret));
3037                 goto out;
3038         }
3039
3040         if (rec->found_link == 0) {
3041                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3042                 if (ret < 0)
3043                         goto out;
3044                 lost_found_ino++;
3045                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3047                                   mode);
3048                 if (ret < 0) {
3049                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050                                 dir_name, strerror(-ret));
3051                         goto out;
3052                 }
3053                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054                                      namebuf, namelen, type, NULL, 1);
3055                 /*
3056                  * Add ".INO" suffix several times to handle case where
3057                  * "FILENAME.INO" is already taken by another file.
3058                  */
3059                 while (ret == -EEXIST) {
3060                         /*
3061                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3062                          */
3063                         if (namelen + count_digits(rec->ino) + 1 >
3064                             BTRFS_NAME_LEN) {
3065                                 ret = -EFBIG;
3066                                 goto out;
3067                         }
3068                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3069                                  ".%llu", rec->ino);
3070                         namelen += count_digits(rec->ino) + 1;
3071                         ret = btrfs_add_link(trans, root, rec->ino,
3072                                              lost_found_ino, namebuf,
3073                                              namelen, type, NULL, 1);
3074                 }
3075                 if (ret < 0) {
3076                         fprintf(stderr,
3077                                 "Failed to link the inode %llu to %s dir: %s\n",
3078                                 rec->ino, dir_name, strerror(-ret));
3079                         goto out;
3080                 }
3081                 /*
3082                  * Just increase the found_link, don't actually add the
3083                  * backref. This will make things easier and this inode
3084                  * record will be freed after the repair is done.
3085                  * So fsck will not report problem about this inode.
3086                  */
3087                 rec->found_link++;
3088                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089                        namelen, namebuf, dir_name);
3090         }
3091         printf("Fixed the nlink of inode %llu\n", rec->ino);
3092 out:
3093         /*
3094          * Clear the flag anyway, or we will loop forever for the same inode
3095          * as it will not be removed from the bad inode list and the dead loop
3096          * happens.
3097          */
3098         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099         btrfs_release_path(path);
3100         return ret;
3101 }
3102
3103 /*
3104  * Check if there is any normal(reg or prealloc) file extent for given
3105  * ino.
3106  * This is used to determine the file type when neither its dir_index/item or
3107  * inode_item exists.
3108  *
3109  * This will *NOT* report error, if any error happens, just consider it does
3110  * not have any normal file extent.
3111  */
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3113 {
3114         struct btrfs_path path;
3115         struct btrfs_key key;
3116         struct btrfs_key found_key;
3117         struct btrfs_file_extent_item *fi;
3118         u8 type;
3119         int ret = 0;
3120
3121         btrfs_init_path(&path);
3122         key.objectid = ino;
3123         key.type = BTRFS_EXTENT_DATA_KEY;
3124         key.offset = 0;
3125
3126         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3127         if (ret < 0) {
3128                 ret = 0;
3129                 goto out;
3130         }
3131         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132                 ret = btrfs_next_leaf(root, &path);
3133                 if (ret) {
3134                         ret = 0;
3135                         goto out;
3136                 }
3137         }
3138         while (1) {
3139                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3140                                       path.slots[0]);
3141                 if (found_key.objectid != ino ||
3142                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3143                         break;
3144                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145                                     struct btrfs_file_extent_item);
3146                 type = btrfs_file_extent_type(path.nodes[0], fi);
3147                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3148                         ret = 1;
3149                         goto out;
3150                 }
3151         }
3152 out:
3153         btrfs_release_path(&path);
3154         return ret;
3155 }
3156
3157 static u32 btrfs_type_to_imode(u8 type)
3158 {
3159         static u32 imode_by_btrfs_type[] = {
3160                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3161                 [BTRFS_FT_DIR]          = S_IFDIR,
3162                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3163                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3164                 [BTRFS_FT_FIFO]         = S_IFIFO,
3165                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3166                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3167         };
3168
3169         return imode_by_btrfs_type[(type)];
3170 }
3171
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173                                 struct btrfs_root *root,
3174                                 struct btrfs_path *path,
3175                                 struct inode_record *rec)
3176 {
3177         u8 filetype;
3178         u32 mode = 0700;
3179         int type_recovered = 0;
3180         int ret = 0;
3181
3182         printf("Trying to rebuild inode:%llu\n", rec->ino);
3183
3184         type_recovered = !find_file_type(rec, &filetype);
3185
3186         /*
3187          * Try to determine inode type if type not found.
3188          *
3189          * For found regular file extent, it must be FILE.
3190          * For found dir_item/index, it must be DIR.
3191          *
3192          * For undetermined one, use FILE as fallback.
3193          *
3194          * TODO:
3195          * 1. If found backref(inode_index/item is already handled) to it,
3196          *    it must be DIR.
3197          *    Need new inode-inode ref structure to allow search for that.
3198          */
3199         if (!type_recovered) {
3200                 if (rec->found_file_extent &&
3201                     find_normal_file_extent(root, rec->ino)) {
3202                         type_recovered = 1;
3203                         filetype = BTRFS_FT_REG_FILE;
3204                 } else if (rec->found_dir_item) {
3205                         type_recovered = 1;
3206                         filetype = BTRFS_FT_DIR;
3207                 } else if (!list_empty(&rec->orphan_extents)) {
3208                         type_recovered = 1;
3209                         filetype = BTRFS_FT_REG_FILE;
3210                 } else{
3211                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3212                                rec->ino);
3213                         type_recovered = 1;
3214                         filetype = BTRFS_FT_REG_FILE;
3215                 }
3216         }
3217
3218         ret = btrfs_new_inode(trans, root, rec->ino,
3219                               mode | btrfs_type_to_imode(filetype));
3220         if (ret < 0)
3221                 goto out;
3222
3223         /*
3224          * Here inode rebuild is done, we only rebuild the inode item,
3225          * don't repair the nlink(like move to lost+found).
3226          * That is the job of nlink repair.
3227          *
3228          * We just fill the record and return
3229          */
3230         rec->found_dir_item = 1;
3231         rec->imode = mode | btrfs_type_to_imode(filetype);
3232         rec->nlink = 0;
3233         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234         /* Ensure the inode_nlinks repair function will be called */
3235         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3236 out:
3237         return ret;
3238 }
3239
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241                                       struct btrfs_root *root,
3242                                       struct btrfs_path *path,
3243                                       struct inode_record *rec)
3244 {
3245         struct orphan_data_extent *orphan;
3246         struct orphan_data_extent *tmp;
3247         int ret = 0;
3248
3249         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3250                 /*
3251                  * Check for conflicting file extents
3252                  *
3253                  * Here we don't know whether the extents is compressed or not,
3254                  * so we can only assume it not compressed nor data offset,
3255                  * and use its disk_len as extent length.
3256                  */
3257                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258                                        orphan->offset, orphan->disk_len, 0);
3259                 btrfs_release_path(path);
3260                 if (ret < 0)
3261                         goto out;
3262                 if (!ret) {
3263                         fprintf(stderr,
3264                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265                                 orphan->disk_bytenr, orphan->disk_len);
3266                         ret = btrfs_free_extent(trans,
3267                                         root->fs_info->extent_root,
3268                                         orphan->disk_bytenr, orphan->disk_len,
3269                                         0, root->objectid, orphan->objectid,
3270                                         orphan->offset);
3271                         if (ret < 0)
3272                                 goto out;
3273                 }
3274                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275                                 orphan->offset, orphan->disk_bytenr,
3276                                 orphan->disk_len, orphan->disk_len);
3277                 if (ret < 0)
3278                         goto out;
3279
3280                 /* Update file size info */
3281                 rec->found_size += orphan->disk_len;
3282                 if (rec->found_size == rec->nbytes)
3283                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3284
3285                 /* Update the file extent hole info too */
3286                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3287                                            orphan->disk_len);
3288                 if (ret < 0)
3289                         goto out;
3290                 if (RB_EMPTY_ROOT(&rec->holes))
3291                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3292
3293                 list_del(&orphan->list);
3294                 free(orphan);
3295         }
3296         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3297 out:
3298         return ret;
3299 }
3300
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302                                         struct btrfs_root *root,
3303                                         struct btrfs_path *path,
3304                                         struct inode_record *rec)
3305 {
3306         struct rb_node *node;
3307         struct file_extent_hole *hole;
3308         int found = 0;
3309         int ret = 0;
3310
3311         node = rb_first(&rec->holes);
3312
3313         while (node) {
3314                 found = 1;
3315                 hole = rb_entry(node, struct file_extent_hole, node);
3316                 ret = btrfs_punch_hole(trans, root, rec->ino,
3317                                        hole->start, hole->len);
3318                 if (ret < 0)
3319                         goto out;
3320                 ret = del_file_extent_hole(&rec->holes, hole->start,
3321                                            hole->len);
3322                 if (ret < 0)
3323                         goto out;
3324                 if (RB_EMPTY_ROOT(&rec->holes))
3325                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326                 node = rb_first(&rec->holes);
3327         }
3328         /* special case for a file losing all its file extent */
3329         if (!found) {
3330                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331                                        round_up(rec->isize,
3332                                                 root->fs_info->sectorsize));
3333                 if (ret < 0)
3334                         goto out;
3335         }
3336         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337                rec->ino, root->objectid);
3338 out:
3339         return ret;
3340 }
3341
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3343 {
3344         struct btrfs_trans_handle *trans;
3345         struct btrfs_path path;
3346         int ret = 0;
3347
3348         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349                              I_ERR_NO_ORPHAN_ITEM |
3350                              I_ERR_LINK_COUNT_WRONG |
3351                              I_ERR_NO_INODE_ITEM |
3352                              I_ERR_FILE_EXTENT_ORPHAN |
3353                              I_ERR_FILE_EXTENT_DISCOUNT|
3354                              I_ERR_FILE_NBYTES_WRONG)))
3355                 return rec->errors;
3356
3357         /*
3358          * For nlink repair, it may create a dir and add link, so
3359          * 2 for parent(256)'s dir_index and dir_item
3360          * 2 for lost+found dir's inode_item and inode_ref
3361          * 1 for the new inode_ref of the file
3362          * 2 for lost+found dir's dir_index and dir_item for the file
3363          */
3364         trans = btrfs_start_transaction(root, 7);
3365         if (IS_ERR(trans))
3366                 return PTR_ERR(trans);
3367
3368         btrfs_init_path(&path);
3369         if (rec->errors & I_ERR_NO_INODE_ITEM)
3370                 ret = repair_inode_no_item(trans, root, &path, rec);
3371         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376                 ret = repair_inode_isize(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380                 ret = repair_inode_nlinks(trans, root, &path, rec);
3381         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382                 ret = repair_inode_nbytes(trans, root, &path, rec);
3383         btrfs_commit_transaction(trans, root);
3384         btrfs_release_path(&path);
3385         return ret;
3386 }
3387
3388 static int check_inode_recs(struct btrfs_root *root,
3389                             struct cache_tree *inode_cache)
3390 {
3391         struct cache_extent *cache;
3392         struct ptr_node *node;
3393         struct inode_record *rec;
3394         struct inode_backref *backref;
3395         int stage = 0;
3396         int ret = 0;
3397         int err = 0;
3398         u64 error = 0;
3399         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3400
3401         if (btrfs_root_refs(&root->root_item) == 0) {
3402                 if (!cache_tree_empty(inode_cache))
3403                         fprintf(stderr, "warning line %d\n", __LINE__);
3404                 return 0;
3405         }
3406
3407         /*
3408          * We need to repair backrefs first because we could change some of the
3409          * errors in the inode recs.
3410          *
3411          * We also need to go through and delete invalid backrefs first and then
3412          * add the correct ones second.  We do this because we may get EEXIST
3413          * when adding back the correct index because we hadn't yet deleted the
3414          * invalid index.
3415          *
3416          * For example, if we were missing a dir index then the directories
3417          * isize would be wrong, so if we fixed the isize to what we thought it
3418          * would be and then fixed the backref we'd still have a invalid fs, so
3419          * we need to add back the dir index and then check to see if the isize
3420          * is still wrong.
3421          */
3422         while (stage < 3) {
3423                 stage++;
3424                 if (stage == 3 && !err)
3425                         break;
3426
3427                 cache = search_cache_extent(inode_cache, 0);
3428                 while (repair && cache) {
3429                         node = container_of(cache, struct ptr_node, cache);
3430                         rec = node->data;
3431                         cache = next_cache_extent(cache);
3432
3433                         /* Need to free everything up and rescan */
3434                         if (stage == 3) {
3435                                 remove_cache_extent(inode_cache, &node->cache);
3436                                 free(node);
3437                                 free_inode_rec(rec);
3438                                 continue;
3439                         }
3440
3441                         if (list_empty(&rec->backrefs))
3442                                 continue;
3443
3444                         ret = repair_inode_backrefs(root, rec, inode_cache,
3445                                                     stage == 1);
3446                         if (ret < 0) {
3447                                 err = ret;
3448                                 stage = 2;
3449                                 break;
3450                         } if (ret > 0) {
3451                                 err = -EAGAIN;
3452                         }
3453                 }
3454         }
3455         if (err)
3456                 return err;
3457
3458         rec = get_inode_rec(inode_cache, root_dirid, 0);
3459         BUG_ON(IS_ERR(rec));
3460         if (rec) {
3461                 ret = check_root_dir(rec);
3462                 if (ret) {
3463                         fprintf(stderr, "root %llu root dir %llu error\n",
3464                                 (unsigned long long)root->root_key.objectid,
3465                                 (unsigned long long)root_dirid);
3466                         print_inode_error(root, rec);
3467                         error++;
3468                 }
3469         } else {
3470                 if (repair) {
3471                         struct btrfs_trans_handle *trans;
3472
3473                         trans = btrfs_start_transaction(root, 1);
3474                         if (IS_ERR(trans)) {
3475                                 err = PTR_ERR(trans);
3476                                 return err;
3477                         }
3478
3479                         fprintf(stderr,
3480                                 "root %llu missing its root dir, recreating\n",
3481                                 (unsigned long long)root->objectid);
3482
3483                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3484                         BUG_ON(ret);
3485
3486                         btrfs_commit_transaction(trans, root);
3487                         return -EAGAIN;
3488                 }
3489
3490                 fprintf(stderr, "root %llu root dir %llu not found\n",
3491                         (unsigned long long)root->root_key.objectid,
3492                         (unsigned long long)root_dirid);
3493         }
3494
3495         while (1) {
3496                 cache = search_cache_extent(inode_cache, 0);
3497                 if (!cache)
3498                         break;
3499                 node = container_of(cache, struct ptr_node, cache);
3500                 rec = node->data;
3501                 remove_cache_extent(inode_cache, &node->cache);
3502                 free(node);
3503                 if (rec->ino == root_dirid ||
3504                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505                         free_inode_rec(rec);
3506                         continue;
3507                 }
3508
3509                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510                         ret = check_orphan_item(root, rec->ino);
3511                         if (ret == 0)
3512                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513                         if (can_free_inode_rec(rec)) {
3514                                 free_inode_rec(rec);
3515                                 continue;
3516                         }
3517                 }
3518
3519                 if (!rec->found_inode_item)
3520                         rec->errors |= I_ERR_NO_INODE_ITEM;
3521                 if (rec->found_link != rec->nlink)
3522                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3523                 if (repair) {
3524                         ret = try_repair_inode(root, rec);
3525                         if (ret == 0 && can_free_inode_rec(rec)) {
3526                                 free_inode_rec(rec);
3527                                 continue;
3528                         }
3529                         ret = 0;
3530                 }
3531
3532                 if (!(repair && ret == 0))
3533                         error++;
3534                 print_inode_error(root, rec);
3535                 list_for_each_entry(backref, &rec->backrefs, list) {
3536                         if (!backref->found_dir_item)
3537                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538                         if (!backref->found_dir_index)
3539                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540                         if (!backref->found_inode_ref)
3541                                 backref->errors |= REF_ERR_NO_INODE_REF;
3542                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543                                 " namelen %u name %s filetype %d errors %x",
3544                                 (unsigned long long)backref->dir,
3545                                 (unsigned long long)backref->index,
3546                                 backref->namelen, backref->name,
3547                                 backref->filetype, backref->errors);
3548                         print_ref_error(backref->errors);
3549                 }
3550                 free_inode_rec(rec);
3551         }
3552         return (error > 0) ? -1 : 0;
3553 }
3554
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3556                                         u64 objectid)
3557 {
3558         struct cache_extent *cache;
3559         struct root_record *rec = NULL;
3560         int ret;
3561
3562         cache = lookup_cache_extent(root_cache, objectid, 1);
3563         if (cache) {
3564                 rec = container_of(cache, struct root_record, cache);
3565         } else {
3566                 rec = calloc(1, sizeof(*rec));
3567                 if (!rec)
3568                         return ERR_PTR(-ENOMEM);
3569                 rec->objectid = objectid;
3570                 INIT_LIST_HEAD(&rec->backrefs);
3571                 rec->cache.start = objectid;
3572                 rec->cache.size = 1;
3573
3574                 ret = insert_cache_extent(root_cache, &rec->cache);
3575                 if (ret)
3576                         return ERR_PTR(-EEXIST);
3577         }
3578         return rec;
3579 }
3580
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582                                              u64 ref_root, u64 dir, u64 index,
3583                                              const char *name, int namelen)
3584 {
3585         struct root_backref *backref;
3586
3587         list_for_each_entry(backref, &rec->backrefs, list) {
3588                 if (backref->ref_root != ref_root || backref->dir != dir ||
3589                     backref->namelen != namelen)
3590                         continue;
3591                 if (memcmp(name, backref->name, namelen))
3592                         continue;
3593                 return backref;
3594         }
3595
3596         backref = calloc(1, sizeof(*backref) + namelen + 1);
3597         if (!backref)
3598                 return NULL;
3599         backref->ref_root = ref_root;
3600         backref->dir = dir;
3601         backref->index = index;
3602         backref->namelen = namelen;
3603         memcpy(backref->name, name, namelen);
3604         backref->name[namelen] = '\0';
3605         list_add_tail(&backref->list, &rec->backrefs);
3606         return backref;
3607 }
3608
3609 static void free_root_record(struct cache_extent *cache)
3610 {
3611         struct root_record *rec;
3612         struct root_backref *backref;
3613
3614         rec = container_of(cache, struct root_record, cache);
3615         while (!list_empty(&rec->backrefs)) {
3616                 backref = to_root_backref(rec->backrefs.next);
3617                 list_del(&backref->list);
3618                 free(backref);
3619         }
3620
3621         free(rec);
3622 }
3623
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3625
3626 static int add_root_backref(struct cache_tree *root_cache,
3627                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3628                             const char *name, int namelen,
3629                             int item_type, int errors)
3630 {
3631         struct root_record *rec;
3632         struct root_backref *backref;
3633
3634         rec = get_root_rec(root_cache, root_id);
3635         BUG_ON(IS_ERR(rec));
3636         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3637         BUG_ON(!backref);
3638
3639         backref->errors |= errors;
3640
3641         if (item_type != BTRFS_DIR_ITEM_KEY) {
3642                 if (backref->found_dir_index || backref->found_back_ref ||
3643                     backref->found_forward_ref) {
3644                         if (backref->index != index)
3645                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3646                 } else {
3647                         backref->index = index;
3648                 }
3649         }
3650
3651         if (item_type == BTRFS_DIR_ITEM_KEY) {
3652                 if (backref->found_forward_ref)
3653                         rec->found_ref++;
3654                 backref->found_dir_item = 1;
3655         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656                 backref->found_dir_index = 1;
3657         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658                 if (backref->found_forward_ref)
3659                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3660                 else if (backref->found_dir_item)
3661                         rec->found_ref++;
3662                 backref->found_forward_ref = 1;
3663         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664                 if (backref->found_back_ref)
3665                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666                 backref->found_back_ref = 1;
3667         } else {
3668                 BUG_ON(1);
3669         }
3670
3671         if (backref->found_forward_ref && backref->found_dir_item)
3672                 backref->reachable = 1;
3673         return 0;
3674 }
3675
3676 static int merge_root_recs(struct btrfs_root *root,
3677                            struct cache_tree *src_cache,
3678                            struct cache_tree *dst_cache)
3679 {
3680         struct cache_extent *cache;
3681         struct ptr_node *node;
3682         struct inode_record *rec;
3683         struct inode_backref *backref;
3684         int ret = 0;
3685
3686         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687                 free_inode_recs_tree(src_cache);
3688                 return 0;
3689         }
3690
3691         while (1) {
3692                 cache = search_cache_extent(src_cache, 0);
3693                 if (!cache)
3694                         break;
3695                 node = container_of(cache, struct ptr_node, cache);
3696                 rec = node->data;
3697                 remove_cache_extent(src_cache, &node->cache);
3698                 free(node);
3699
3700                 ret = is_child_root(root, root->objectid, rec->ino);
3701                 if (ret < 0)
3702                         break;
3703                 else if (ret == 0)
3704                         goto skip;
3705
3706                 list_for_each_entry(backref, &rec->backrefs, list) {
3707                         BUG_ON(backref->found_inode_ref);
3708                         if (backref->found_dir_item)
3709                                 add_root_backref(dst_cache, rec->ino,
3710                                         root->root_key.objectid, backref->dir,
3711                                         backref->index, backref->name,
3712                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3713                                         backref->errors);
3714                         if (backref->found_dir_index)
3715                                 add_root_backref(dst_cache, rec->ino,
3716                                         root->root_key.objectid, backref->dir,
3717                                         backref->index, backref->name,
3718                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3719                                         backref->errors);
3720                 }
3721 skip:
3722                 free_inode_rec(rec);
3723         }
3724         if (ret < 0)
3725                 return ret;
3726         return 0;
3727 }
3728
3729 static int check_root_refs(struct btrfs_root *root,
3730                            struct cache_tree *root_cache)
3731 {
3732         struct root_record *rec;
3733         struct root_record *ref_root;
3734         struct root_backref *backref;
3735         struct cache_extent *cache;
3736         int loop = 1;
3737         int ret;
3738         int error;
3739         int errors = 0;
3740
3741         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742         BUG_ON(IS_ERR(rec));
3743         rec->found_ref = 1;
3744
3745         /* fixme: this can not detect circular references */
3746         while (loop) {
3747                 loop = 0;
3748                 cache = search_cache_extent(root_cache, 0);
3749                 while (1) {
3750                         if (!cache)
3751                                 break;
3752                         rec = container_of(cache, struct root_record, cache);
3753                         cache = next_cache_extent(cache);
3754
3755                         if (rec->found_ref == 0)
3756                                 continue;
3757
3758                         list_for_each_entry(backref, &rec->backrefs, list) {
3759                                 if (!backref->reachable)
3760                                         continue;
3761
3762                                 ref_root = get_root_rec(root_cache,
3763                                                         backref->ref_root);
3764                                 BUG_ON(IS_ERR(ref_root));
3765                                 if (ref_root->found_ref > 0)
3766                                         continue;
3767
3768                                 backref->reachable = 0;
3769                                 rec->found_ref--;
3770                                 if (rec->found_ref == 0)
3771                                         loop = 1;
3772                         }
3773                 }
3774         }
3775
3776         cache = search_cache_extent(root_cache, 0);
3777         while (1) {
3778                 if (!cache)
3779                         break;
3780                 rec = container_of(cache, struct root_record, cache);
3781                 cache = next_cache_extent(cache);
3782
3783                 if (rec->found_ref == 0 &&
3784                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786                         ret = check_orphan_item(root->fs_info->tree_root,
3787                                                 rec->objectid);
3788                         if (ret == 0)
3789                                 continue;
3790
3791                         /*
3792                          * If we don't have a root item then we likely just have
3793                          * a dir item in a snapshot for this root but no actual
3794                          * ref key or anything so it's meaningless.
3795                          */
3796                         if (!rec->found_root_item)
3797                                 continue;
3798                         errors++;
3799                         fprintf(stderr, "fs tree %llu not referenced\n",
3800                                 (unsigned long long)rec->objectid);
3801                 }
3802
3803                 error = 0;
3804                 if (rec->found_ref > 0 && !rec->found_root_item)
3805                         error = 1;
3806                 list_for_each_entry(backref, &rec->backrefs, list) {
3807                         if (!backref->found_dir_item)
3808                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809                         if (!backref->found_dir_index)
3810                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811                         if (!backref->found_back_ref)
3812                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813                         if (!backref->found_forward_ref)
3814                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3815                         if (backref->reachable && backref->errors)
3816                                 error = 1;
3817                 }
3818                 if (!error)
3819                         continue;
3820
3821                 errors++;
3822                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823                         (unsigned long long)rec->objectid, rec->found_ref,
3824                          rec->found_root_item ? "" : "not found");
3825
3826                 list_for_each_entry(backref, &rec->backrefs, list) {
3827                         if (!backref->reachable)
3828                                 continue;
3829                         if (!backref->errors && rec->found_root_item)
3830                                 continue;
3831                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832                                 " index %llu namelen %u name %s errors %x\n",
3833                                 (unsigned long long)backref->ref_root,
3834                                 (unsigned long long)backref->dir,
3835                                 (unsigned long long)backref->index,
3836                                 backref->namelen, backref->name,
3837                                 backref->errors);
3838                         print_ref_error(backref->errors);
3839                 }
3840         }
3841         return errors > 0 ? 1 : 0;
3842 }
3843
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845                             struct btrfs_key *key,
3846                             struct cache_tree *root_cache)
3847 {
3848         u64 dirid;
3849         u64 index;
3850         u32 len;
3851         u32 name_len;
3852         struct btrfs_root_ref *ref;
3853         char namebuf[BTRFS_NAME_LEN];
3854         int error;
3855
3856         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3857
3858         dirid = btrfs_root_ref_dirid(eb, ref);
3859         index = btrfs_root_ref_sequence(eb, ref);
3860         name_len = btrfs_root_ref_name_len(eb, ref);
3861
3862         if (name_len <= BTRFS_NAME_LEN) {
3863                 len = name_len;
3864                 error = 0;
3865         } else {
3866                 len = BTRFS_NAME_LEN;
3867                 error = REF_ERR_NAME_TOO_LONG;
3868         }
3869         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3870
3871         if (key->type == BTRFS_ROOT_REF_KEY) {
3872                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873                                  index, namebuf, len, key->type, error);
3874         } else {
3875                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876                                  index, namebuf, len, key->type, error);
3877         }
3878         return 0;
3879 }
3880
3881 static void free_corrupt_block(struct cache_extent *cache)
3882 {
3883         struct btrfs_corrupt_block *corrupt;
3884
3885         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3886         free(corrupt);
3887 }
3888
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3890
3891 /*
3892  * Repair the btree of the given root.
3893  *
3894  * The fix is to remove the node key in corrupt_blocks cache_tree.
3895  * and rebalance the tree.
3896  * After the fix, the btree should be writeable.
3897  */
3898 static int repair_btree(struct btrfs_root *root,
3899                         struct cache_tree *corrupt_blocks)
3900 {
3901         struct btrfs_trans_handle *trans;
3902         struct btrfs_path path;
3903         struct btrfs_corrupt_block *corrupt;
3904         struct cache_extent *cache;
3905         struct btrfs_key key;
3906         u64 offset;
3907         int level;
3908         int ret = 0;
3909
3910         if (cache_tree_empty(corrupt_blocks))
3911                 return 0;
3912
3913         trans = btrfs_start_transaction(root, 1);
3914         if (IS_ERR(trans)) {
3915                 ret = PTR_ERR(trans);
3916                 fprintf(stderr, "Error starting transaction: %s\n",
3917                         strerror(-ret));
3918                 return ret;
3919         }
3920         btrfs_init_path(&path);
3921         cache = first_cache_extent(corrupt_blocks);
3922         while (cache) {
3923                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3924                                        cache);
3925                 level = corrupt->level;
3926                 path.lowest_level = level;
3927                 key.objectid = corrupt->key.objectid;
3928                 key.type = corrupt->key.type;
3929                 key.offset = corrupt->key.offset;
3930
3931                 /*
3932                  * Here we don't want to do any tree balance, since it may
3933                  * cause a balance with corrupted brother leaf/node,
3934                  * so ins_len set to 0 here.
3935                  * Balance will be done after all corrupt node/leaf is deleted.
3936                  */
3937                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3938                 if (ret < 0)
3939                         goto out;
3940                 offset = btrfs_node_blockptr(path.nodes[level],
3941                                              path.slots[level]);
3942
3943                 /* Remove the ptr */
3944                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3945                 if (ret < 0)
3946                         goto out;
3947                 /*
3948                  * Remove the corresponding extent
3949                  * return value is not concerned.
3950                  */
3951                 btrfs_release_path(&path);
3952                 ret = btrfs_free_extent(trans, root, offset,
3953                                 root->fs_info->nodesize, 0,
3954                                 root->root_key.objectid, level - 1, 0);
3955                 cache = next_cache_extent(cache);
3956         }
3957
3958         /* Balance the btree using btrfs_search_slot() */
3959         cache = first_cache_extent(corrupt_blocks);
3960         while (cache) {
3961                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3962                                        cache);
3963                 memcpy(&key, &corrupt->key, sizeof(key));
3964                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3965                 if (ret < 0)
3966                         goto out;
3967                 /* return will always >0 since it won't find the item */
3968                 ret = 0;
3969                 btrfs_release_path(&path);
3970                 cache = next_cache_extent(cache);
3971         }
3972 out:
3973         btrfs_commit_transaction(trans, root);
3974         btrfs_release_path(&path);
3975         return ret;
3976 }
3977
3978 static int check_fs_root(struct btrfs_root *root,
3979                          struct cache_tree *root_cache,
3980                          struct walk_control *wc)
3981 {
3982         int ret = 0;
3983         int err = 0;
3984         int wret;
3985         int level;
3986         struct btrfs_path path;
3987         struct shared_node root_node;
3988         struct root_record *rec;
3989         struct btrfs_root_item *root_item = &root->root_item;
3990         struct cache_tree corrupt_blocks;
3991         struct orphan_data_extent *orphan;
3992         struct orphan_data_extent *tmp;
3993         enum btrfs_tree_block_status status;
3994         struct node_refs nrefs;
3995
3996         /*
3997          * Reuse the corrupt_block cache tree to record corrupted tree block
3998          *
3999          * Unlike the usage in extent tree check, here we do it in a per
4000          * fs/subvol tree base.
4001          */
4002         cache_tree_init(&corrupt_blocks);
4003         root->fs_info->corrupt_blocks = &corrupt_blocks;
4004
4005         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006                 rec = get_root_rec(root_cache, root->root_key.objectid);
4007                 BUG_ON(IS_ERR(rec));
4008                 if (btrfs_root_refs(root_item) > 0)
4009                         rec->found_root_item = 1;
4010         }
4011
4012         btrfs_init_path(&path);
4013         memset(&root_node, 0, sizeof(root_node));
4014         cache_tree_init(&root_node.root_cache);
4015         cache_tree_init(&root_node.inode_cache);
4016         memset(&nrefs, 0, sizeof(nrefs));
4017
4018         /* Move the orphan extent record to corresponding inode_record */
4019         list_for_each_entry_safe(orphan, tmp,
4020                                  &root->orphan_data_extents, list) {
4021                 struct inode_record *inode;
4022
4023                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4024                                       1);
4025                 BUG_ON(IS_ERR(inode));
4026                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027                 list_move(&orphan->list, &inode->orphan_extents);
4028         }
4029
4030         level = btrfs_header_level(root->node);
4031         memset(wc->nodes, 0, sizeof(wc->nodes));
4032         wc->nodes[level] = &root_node;
4033         wc->active_node = level;
4034         wc->root_level = level;
4035
4036         /* We may not have checked the root block, lets do that now */
4037         if (btrfs_is_leaf(root->node))
4038                 status = btrfs_check_leaf(root, NULL, root->node);
4039         else
4040                 status = btrfs_check_node(root, NULL, root->node);
4041         if (status != BTRFS_TREE_BLOCK_CLEAN)
4042                 return -EIO;
4043
4044         if (btrfs_root_refs(root_item) > 0 ||
4045             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046                 path.nodes[level] = root->node;
4047                 extent_buffer_get(root->node);
4048                 path.slots[level] = 0;
4049         } else {
4050                 struct btrfs_key key;
4051                 struct btrfs_disk_key found_key;
4052
4053                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054                 level = root_item->drop_level;
4055                 path.lowest_level = level;
4056                 if (level > btrfs_header_level(root->node) ||
4057                     level >= BTRFS_MAX_LEVEL) {
4058                         error("ignoring invalid drop level: %u", level);
4059                         goto skip_walking;
4060                 }
4061                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4062                 if (wret < 0)
4063                         goto skip_walking;
4064                 btrfs_node_key(path.nodes[level], &found_key,
4065                                 path.slots[level]);
4066                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067                                         sizeof(found_key)));
4068         }
4069
4070         while (1) {
4071                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4072                 if (wret < 0)
4073                         ret = wret;
4074                 if (wret != 0)
4075                         break;
4076
4077                 wret = walk_up_tree(root, &path, wc, &level);
4078                 if (wret < 0)
4079                         ret = wret;
4080                 if (wret != 0)
4081                         break;
4082         }
4083 skip_walking:
4084         btrfs_release_path(&path);
4085
4086         if (!cache_tree_empty(&corrupt_blocks)) {
4087                 struct cache_extent *cache;
4088                 struct btrfs_corrupt_block *corrupt;
4089
4090                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091                        root->root_key.objectid);
4092                 cache = first_cache_extent(&corrupt_blocks);
4093                 while (cache) {
4094                         corrupt = container_of(cache,
4095                                                struct btrfs_corrupt_block,
4096                                                cache);
4097                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098                                cache->start, corrupt->level,
4099                                corrupt->key.objectid, corrupt->key.type,
4100                                corrupt->key.offset);
4101                         cache = next_cache_extent(cache);
4102                 }
4103                 if (repair) {
4104                         printf("Try to repair the btree for root %llu\n",
4105                                root->root_key.objectid);
4106                         ret = repair_btree(root, &corrupt_blocks);
4107                         if (ret < 0)
4108                                 fprintf(stderr, "Failed to repair btree: %s\n",
4109                                         strerror(-ret));
4110                         if (!ret)
4111                                 printf("Btree for root %llu is fixed\n",
4112                                        root->root_key.objectid);
4113                 }
4114         }
4115
4116         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4117         if (err < 0)
4118                 ret = err;
4119
4120         if (root_node.current) {
4121                 root_node.current->checked = 1;
4122                 maybe_free_inode_rec(&root_node.inode_cache,
4123                                 root_node.current);
4124         }
4125
4126         err = check_inode_recs(root, &root_node.inode_cache);
4127         if (!ret)
4128                 ret = err;
4129
4130         free_corrupt_blocks_tree(&corrupt_blocks);
4131         root->fs_info->corrupt_blocks = NULL;
4132         free_orphan_data_extents(&root->orphan_data_extents);
4133         return ret;
4134 }
4135
4136 static int fs_root_objectid(u64 objectid)
4137 {
4138         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4140                 return 1;
4141         return is_fstree(objectid);
4142 }
4143
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145                           struct cache_tree *root_cache)
4146 {
4147         struct btrfs_path path;
4148         struct btrfs_key key;
4149         struct walk_control wc;
4150         struct extent_buffer *leaf, *tree_node;
4151         struct btrfs_root *tmp_root;
4152         struct btrfs_root *tree_root = fs_info->tree_root;
4153         int ret;
4154         int err = 0;
4155
4156         if (ctx.progress_enabled) {
4157                 ctx.tp = TASK_FS_ROOTS;
4158                 task_start(ctx.info);
4159         }
4160
4161         /*
4162          * Just in case we made any changes to the extent tree that weren't
4163          * reflected into the free space cache yet.
4164          */
4165         if (repair)
4166                 reset_cached_block_groups(fs_info);
4167         memset(&wc, 0, sizeof(wc));
4168         cache_tree_init(&wc.shared);
4169         btrfs_init_path(&path);
4170
4171 again:
4172         key.offset = 0;
4173         key.objectid = 0;
4174         key.type = BTRFS_ROOT_ITEM_KEY;
4175         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4176         if (ret < 0) {
4177                 err = 1;
4178                 goto out;
4179         }
4180         tree_node = tree_root->node;
4181         while (1) {
4182                 if (tree_node != tree_root->node) {
4183                         free_root_recs_tree(root_cache);
4184                         btrfs_release_path(&path);
4185                         goto again;
4186                 }
4187                 leaf = path.nodes[0];
4188                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189                         ret = btrfs_next_leaf(tree_root, &path);
4190                         if (ret) {
4191                                 if (ret < 0)
4192                                         err = 1;
4193                                 break;
4194                         }
4195                         leaf = path.nodes[0];
4196                 }
4197                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199                     fs_root_objectid(key.objectid)) {
4200                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201                                 tmp_root = btrfs_read_fs_root_no_cache(
4202                                                 fs_info, &key);
4203                         } else {
4204                                 key.offset = (u64)-1;
4205                                 tmp_root = btrfs_read_fs_root(
4206                                                 fs_info, &key);
4207                         }
4208                         if (IS_ERR(tmp_root)) {
4209                                 err = 1;
4210                                 goto next;
4211                         }
4212                         ret = check_fs_root(tmp_root, root_cache, &wc);
4213                         if (ret == -EAGAIN) {
4214                                 free_root_recs_tree(root_cache);
4215                                 btrfs_release_path(&path);
4216                                 goto again;
4217                         }
4218                         if (ret)
4219                                 err = 1;
4220                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221                                 btrfs_free_fs_root(tmp_root);
4222                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4224                         process_root_ref(leaf, path.slots[0], &key,
4225                                          root_cache);
4226                 }
4227 next:
4228                 path.slots[0]++;
4229         }
4230 out:
4231         btrfs_release_path(&path);
4232         if (err)
4233                 free_extent_cache_tree(&wc.shared);
4234         if (!cache_tree_empty(&wc.shared))
4235                 fprintf(stderr, "warning line %d\n", __LINE__);
4236
4237         task_stop(ctx.info);
4238
4239         return err;
4240 }
4241
4242 /*
4243  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244  * INODE_REF/INODE_EXTREF match.
4245  *
4246  * @root:       the root of the fs/file tree
4247  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4248  * @key:        the key of the DIR_ITEM/DIR_INDEX
4249  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4250  *              distinguish root_dir between normal dir/file
4251  * @name:       the name in the INODE_REF/INODE_EXTREF
4252  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4253  * @mode:       the st_mode of INODE_ITEM
4254  *
4255  * Return 0 if no error occurred.
4256  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4258  * dir/file.
4259  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260  * not match for normal dir/file.
4261  */
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263                          struct btrfs_key *key, u64 index, char *name,
4264                          u32 namelen, u32 mode)
4265 {
4266         struct btrfs_path path;
4267         struct extent_buffer *node;
4268         struct btrfs_dir_item *di;
4269         struct btrfs_key location;
4270         char namebuf[BTRFS_NAME_LEN] = {0};
4271         u32 total;
4272         u32 cur = 0;
4273         u32 len;
4274         u32 name_len;
4275         u32 data_len;
4276         u8 filetype;
4277         int slot;
4278         int ret;
4279
4280         btrfs_init_path(&path);
4281         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4282         if (ret < 0) {
4283                 ret = DIR_ITEM_MISSING;
4284                 goto out;
4285         }
4286
4287         /* Process root dir and goto out*/
4288         if (index == 0) {
4289                 if (ret == 0) {
4290                         ret = ROOT_DIR_ERROR;
4291                         error(
4292                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4293                                 root->objectid,
4294                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4295                                         "REF" : "EXTREF",
4296                                 ref_key->objectid, ref_key->offset,
4297                                 key->type == BTRFS_DIR_ITEM_KEY ?
4298                                         "DIR_ITEM" : "DIR_INDEX");
4299                 } else {
4300                         ret = 0;
4301                 }
4302
4303                 goto out;
4304         }
4305
4306         /* Process normal file/dir */
4307         if (ret > 0) {
4308                 ret = DIR_ITEM_MISSING;
4309                 error(
4310                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4311                         root->objectid,
4312                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313                         ref_key->objectid, ref_key->offset,
4314                         key->type == BTRFS_DIR_ITEM_KEY ?
4315                                 "DIR_ITEM" : "DIR_INDEX",
4316                         key->objectid, key->offset, namelen, name,
4317                         imode_to_type(mode));
4318                 goto out;
4319         }
4320
4321         /* Check whether inode_id/filetype/name match */
4322         node = path.nodes[0];
4323         slot = path.slots[0];
4324         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325         total = btrfs_item_size_nr(node, slot);
4326         while (cur < total) {
4327                 ret = DIR_ITEM_MISMATCH;
4328                 name_len = btrfs_dir_name_len(node, di);
4329                 data_len = btrfs_dir_data_len(node, di);
4330
4331                 btrfs_dir_item_key_to_cpu(node, di, &location);
4332                 if (location.objectid != ref_key->objectid ||
4333                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4334                     location.offset != 0)
4335                         goto next;
4336
4337                 filetype = btrfs_dir_type(node, di);
4338                 if (imode_to_type(mode) != filetype)
4339                         goto next;
4340
4341                 if (cur + sizeof(*di) + name_len > total ||
4342                     name_len > BTRFS_NAME_LEN) {
4343                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4344                                 root->objectid,
4345                                 key->type == BTRFS_DIR_ITEM_KEY ?
4346                                 "DIR_ITEM" : "DIR_INDEX",
4347                                 key->objectid, key->offset, name_len);
4348
4349                         if (cur + sizeof(*di) > total)
4350                                 break;
4351                         len = min_t(u32, total - cur - sizeof(*di),
4352                                     BTRFS_NAME_LEN);
4353                 } else {
4354                         len = name_len;
4355                 }
4356
4357                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358                 if (len != namelen || strncmp(namebuf, name, len))
4359                         goto next;
4360
4361                 ret = 0;
4362                 goto out;
4363 next:
4364                 len = sizeof(*di) + name_len + data_len;
4365                 di = (struct btrfs_dir_item *)((char *)di + len);
4366                 cur += len;
4367         }
4368         if (ret == DIR_ITEM_MISMATCH)
4369                 error(
4370                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4371                         root->objectid,
4372                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373                         ref_key->objectid, ref_key->offset,
4374                         key->type == BTRFS_DIR_ITEM_KEY ?
4375                                 "DIR_ITEM" : "DIR_INDEX",
4376                         key->objectid, key->offset, namelen, name,
4377                         imode_to_type(mode));
4378 out:
4379         btrfs_release_path(&path);
4380         return ret;
4381 }
4382
4383 /*
4384  * Traverse the given INODE_REF and call find_dir_item() to find related
4385  * DIR_ITEM/DIR_INDEX.
4386  *
4387  * @root:       the root of the fs/file tree
4388  * @ref_key:    the key of the INODE_REF
4389  * @refs:       the count of INODE_REF
4390  * @mode:       the st_mode of INODE_ITEM
4391  *
4392  * Return 0 if no error occurred.
4393  */
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395                            struct extent_buffer *node, int slot, u64 *refs,
4396                            int mode)
4397 {
4398         struct btrfs_key key;
4399         struct btrfs_inode_ref *ref;
4400         char namebuf[BTRFS_NAME_LEN] = {0};
4401         u32 total;
4402         u32 cur = 0;
4403         u32 len;
4404         u32 name_len;
4405         u64 index;
4406         int ret, err = 0;
4407
4408         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409         total = btrfs_item_size_nr(node, slot);
4410
4411 next:
4412         /* Update inode ref count */
4413         (*refs)++;
4414
4415         index = btrfs_inode_ref_index(node, ref);
4416         name_len = btrfs_inode_ref_name_len(node, ref);
4417         if (cur + sizeof(*ref) + name_len > total ||
4418             name_len > BTRFS_NAME_LEN) {
4419                 warning("root %llu INODE_REF[%llu %llu] name too long",
4420                         root->objectid, ref_key->objectid, ref_key->offset);
4421
4422                 if (total < cur + sizeof(*ref))
4423                         goto out;
4424                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4425         } else {
4426                 len = name_len;
4427         }
4428
4429         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4430
4431         /* Check root dir ref name */
4432         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434                       root->objectid, ref_key->objectid, ref_key->offset,
4435                       namebuf);
4436                 err |= ROOT_DIR_ERROR;
4437         }
4438
4439         /* Find related DIR_INDEX */
4440         key.objectid = ref_key->offset;
4441         key.type = BTRFS_DIR_INDEX_KEY;
4442         key.offset = index;
4443         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4444         err |= ret;
4445
4446         /* Find related dir_item */
4447         key.objectid = ref_key->offset;
4448         key.type = BTRFS_DIR_ITEM_KEY;
4449         key.offset = btrfs_name_hash(namebuf, len);
4450         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4451         err |= ret;
4452
4453         len = sizeof(*ref) + name_len;
4454         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4455         cur += len;
4456         if (cur < total)
4457                 goto next;
4458
4459 out:
4460         return err;
4461 }
4462
4463 /*
4464  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465  * DIR_ITEM/DIR_INDEX.
4466  *
4467  * @root:       the root of the fs/file tree
4468  * @ref_key:    the key of the INODE_EXTREF
4469  * @refs:       the count of INODE_EXTREF
4470  * @mode:       the st_mode of INODE_ITEM
4471  *
4472  * Return 0 if no error occurred.
4473  */
4474 static int check_inode_extref(struct btrfs_root *root,
4475                               struct btrfs_key *ref_key,
4476                               struct extent_buffer *node, int slot, u64 *refs,
4477                               int mode)
4478 {
4479         struct btrfs_key key;
4480         struct btrfs_inode_extref *extref;
4481         char namebuf[BTRFS_NAME_LEN] = {0};
4482         u32 total;
4483         u32 cur = 0;
4484         u32 len;
4485         u32 name_len;
4486         u64 index;
4487         u64 parent;
4488         int ret;
4489         int err = 0;
4490
4491         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492         total = btrfs_item_size_nr(node, slot);
4493
4494 next:
4495         /* update inode ref count */
4496         (*refs)++;
4497         name_len = btrfs_inode_extref_name_len(node, extref);
4498         index = btrfs_inode_extref_index(node, extref);
4499         parent = btrfs_inode_extref_parent(node, extref);
4500         if (name_len <= BTRFS_NAME_LEN) {
4501                 len = name_len;
4502         } else {
4503                 len = BTRFS_NAME_LEN;
4504                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505                         root->objectid, ref_key->objectid, ref_key->offset);
4506         }
4507         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4508
4509         /* Check root dir ref name */
4510         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512                       root->objectid, ref_key->objectid, ref_key->offset,
4513                       namebuf);
4514                 err |= ROOT_DIR_ERROR;
4515         }
4516
4517         /* find related dir_index */
4518         key.objectid = parent;
4519         key.type = BTRFS_DIR_INDEX_KEY;
4520         key.offset = index;
4521         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4522         err |= ret;
4523
4524         /* find related dir_item */
4525         key.objectid = parent;
4526         key.type = BTRFS_DIR_ITEM_KEY;
4527         key.offset = btrfs_name_hash(namebuf, len);
4528         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4529         err |= ret;
4530
4531         len = sizeof(*extref) + name_len;
4532         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4533         cur += len;
4534
4535         if (cur < total)
4536                 goto next;
4537
4538         return err;
4539 }
4540
4541 /*
4542  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543  * DIR_ITEM/DIR_INDEX match.
4544  *
4545  * @root:       the root of the fs/file tree
4546  * @key:        the key of the INODE_REF/INODE_EXTREF
4547  * @name:       the name in the INODE_REF/INODE_EXTREF
4548  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4549  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4550  * to (u64)-1
4551  * @ext_ref:    the EXTENDED_IREF feature
4552  *
4553  * Return 0 if no error occurred.
4554  * Return >0 for error bitmap
4555  */
4556 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4557                           char *name, int namelen, u64 index,
4558                           unsigned int ext_ref)
4559 {
4560         struct btrfs_path path;
4561         struct btrfs_inode_ref *ref;
4562         struct btrfs_inode_extref *extref;
4563         struct extent_buffer *node;
4564         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4565         u32 total;
4566         u32 cur = 0;
4567         u32 len;
4568         u32 ref_namelen;
4569         u64 ref_index;
4570         u64 parent;
4571         u64 dir_id;
4572         int slot;
4573         int ret;
4574
4575         btrfs_init_path(&path);
4576         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4577         if (ret) {
4578                 ret = INODE_REF_MISSING;
4579                 goto extref;
4580         }
4581
4582         node = path.nodes[0];
4583         slot = path.slots[0];
4584
4585         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4586         total = btrfs_item_size_nr(node, slot);
4587
4588         /* Iterate all entry of INODE_REF */
4589         while (cur < total) {
4590                 ret = INODE_REF_MISSING;
4591
4592                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4593                 ref_index = btrfs_inode_ref_index(node, ref);
4594                 if (index != (u64)-1 && index != ref_index)
4595                         goto next_ref;
4596
4597                 if (cur + sizeof(*ref) + ref_namelen > total ||
4598                     ref_namelen > BTRFS_NAME_LEN) {
4599                         warning("root %llu INODE %s[%llu %llu] name too long",
4600                                 root->objectid,
4601                                 key->type == BTRFS_INODE_REF_KEY ?
4602                                         "REF" : "EXTREF",
4603                                 key->objectid, key->offset);
4604
4605                         if (cur + sizeof(*ref) > total)
4606                                 break;
4607                         len = min_t(u32, total - cur - sizeof(*ref),
4608                                     BTRFS_NAME_LEN);
4609                 } else {
4610                         len = ref_namelen;
4611                 }
4612
4613                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4614                                    len);
4615
4616                 if (len != namelen || strncmp(ref_namebuf, name, len))
4617                         goto next_ref;
4618
4619                 ret = 0;
4620                 goto out;
4621 next_ref:
4622                 len = sizeof(*ref) + ref_namelen;
4623                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4624                 cur += len;
4625         }
4626
4627 extref:
4628         /* Skip if not support EXTENDED_IREF feature */
4629         if (!ext_ref)
4630                 goto out;
4631
4632         btrfs_release_path(&path);
4633         btrfs_init_path(&path);
4634
4635         dir_id = key->offset;
4636         key->type = BTRFS_INODE_EXTREF_KEY;
4637         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4638
4639         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4640         if (ret) {
4641                 ret = INODE_REF_MISSING;
4642                 goto out;
4643         }
4644
4645         node = path.nodes[0];
4646         slot = path.slots[0];
4647
4648         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4649         cur = 0;
4650         total = btrfs_item_size_nr(node, slot);
4651
4652         /* Iterate all entry of INODE_EXTREF */
4653         while (cur < total) {
4654                 ret = INODE_REF_MISSING;
4655
4656                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4657                 ref_index = btrfs_inode_extref_index(node, extref);
4658                 parent = btrfs_inode_extref_parent(node, extref);
4659                 if (index != (u64)-1 && index != ref_index)
4660                         goto next_extref;
4661
4662                 if (parent != dir_id)
4663                         goto next_extref;
4664
4665                 if (ref_namelen <= BTRFS_NAME_LEN) {
4666                         len = ref_namelen;
4667                 } else {
4668                         len = BTRFS_NAME_LEN;
4669                         warning("root %llu INODE %s[%llu %llu] name too long",
4670                                 root->objectid,
4671                                 key->type == BTRFS_INODE_REF_KEY ?
4672                                         "REF" : "EXTREF",
4673                                 key->objectid, key->offset);
4674                 }
4675                 read_extent_buffer(node, ref_namebuf,
4676                                    (unsigned long)(extref + 1), len);
4677
4678                 if (len != namelen || strncmp(ref_namebuf, name, len))
4679                         goto next_extref;
4680
4681                 ret = 0;
4682                 goto out;
4683
4684 next_extref:
4685                 len = sizeof(*extref) + ref_namelen;
4686                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4687                 cur += len;
4688
4689         }
4690 out:
4691         btrfs_release_path(&path);
4692         return ret;
4693 }
4694
4695 /*
4696  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4697  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4698  *
4699  * @root:       the root of the fs/file tree
4700  * @key:        the key of the INODE_REF/INODE_EXTREF
4701  * @size:       the st_size of the INODE_ITEM
4702  * @ext_ref:    the EXTENDED_IREF feature
4703  *
4704  * Return 0 if no error occurred.
4705  */
4706 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4707                           struct extent_buffer *node, int slot, u64 *size,
4708                           unsigned int ext_ref)
4709 {
4710         struct btrfs_dir_item *di;
4711         struct btrfs_inode_item *ii;
4712         struct btrfs_path path;
4713         struct btrfs_key location;
4714         char namebuf[BTRFS_NAME_LEN] = {0};
4715         u32 total;
4716         u32 cur = 0;
4717         u32 len;
4718         u32 name_len;
4719         u32 data_len;
4720         u8 filetype;
4721         u32 mode;
4722         u64 index;
4723         int ret;
4724         int err = 0;
4725
4726         /*
4727          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4728          * ignore index check.
4729          */
4730         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4731
4732         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4733         total = btrfs_item_size_nr(node, slot);
4734
4735         while (cur < total) {
4736                 data_len = btrfs_dir_data_len(node, di);
4737                 if (data_len)
4738                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4739                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4740                               "DIR_ITEM" : "DIR_INDEX",
4741                               key->objectid, key->offset, data_len);
4742
4743                 name_len = btrfs_dir_name_len(node, di);
4744                 if (cur + sizeof(*di) + name_len > total ||
4745                     name_len > BTRFS_NAME_LEN) {
4746                         warning("root %llu %s[%llu %llu] name too long",
4747                                 root->objectid,
4748                                 key->type == BTRFS_DIR_ITEM_KEY ?
4749                                 "DIR_ITEM" : "DIR_INDEX",
4750                                 key->objectid, key->offset);
4751
4752                         if (cur + sizeof(*di) > total)
4753                                 break;
4754                         len = min_t(u32, total - cur - sizeof(*di),
4755                                     BTRFS_NAME_LEN);
4756                 } else {
4757                         len = name_len;
4758                 }
4759                 (*size) += name_len;
4760
4761                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4762                 filetype = btrfs_dir_type(node, di);
4763
4764                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4765                     key->offset != btrfs_name_hash(namebuf, len)) {
4766                         err |= -EIO;
4767                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4768                                 root->objectid, key->objectid, key->offset,
4769                                 namebuf, len, filetype, key->offset,
4770                                 btrfs_name_hash(namebuf, len));
4771                 }
4772
4773                 btrfs_init_path(&path);
4774                 btrfs_dir_item_key_to_cpu(node, di, &location);
4775
4776                 /* Ignore related ROOT_ITEM check */
4777                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4778                         goto next;
4779
4780                 /* Check relative INODE_ITEM(existence/filetype) */
4781                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4782                 if (ret) {
4783                         err |= INODE_ITEM_MISSING;
4784                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4785                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4786                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4787                               key->offset, location.objectid, name_len,
4788                               namebuf, filetype);
4789                         goto next;
4790                 }
4791
4792                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4793                                     struct btrfs_inode_item);
4794                 mode = btrfs_inode_mode(path.nodes[0], ii);
4795
4796                 if (imode_to_type(mode) != filetype) {
4797                         err |= INODE_ITEM_MISMATCH;
4798                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4799                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4800                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4801                               key->offset, name_len, namebuf, filetype);
4802                 }
4803
4804                 /* Check relative INODE_REF/INODE_EXTREF */
4805                 location.type = BTRFS_INODE_REF_KEY;
4806                 location.offset = key->objectid;
4807                 ret = find_inode_ref(root, &location, namebuf, len,
4808                                        index, ext_ref);
4809                 err |= ret;
4810                 if (ret & INODE_REF_MISSING)
4811                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4812                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4813                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4814                               key->offset, name_len, namebuf, filetype);
4815
4816 next:
4817                 btrfs_release_path(&path);
4818                 len = sizeof(*di) + name_len + data_len;
4819                 di = (struct btrfs_dir_item *)((char *)di + len);
4820                 cur += len;
4821
4822                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4823                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4824                               root->objectid, key->objectid, key->offset);
4825                         break;
4826                 }
4827         }
4828
4829         return err;
4830 }
4831
4832 /*
4833  * Check file extent datasum/hole, update the size of the file extents,
4834  * check and update the last offset of the file extent.
4835  *
4836  * @root:       the root of fs/file tree.
4837  * @fkey:       the key of the file extent.
4838  * @nodatasum:  INODE_NODATASUM feature.
4839  * @size:       the sum of all EXTENT_DATA items size for this inode.
4840  * @end:        the offset of the last extent.
4841  *
4842  * Return 0 if no error occurred.
4843  */
4844 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4845                              struct extent_buffer *node, int slot,
4846                              unsigned int nodatasum, u64 *size, u64 *end)
4847 {
4848         struct btrfs_file_extent_item *fi;
4849         u64 disk_bytenr;
4850         u64 disk_num_bytes;
4851         u64 extent_num_bytes;
4852         u64 extent_offset;
4853         u64 csum_found;         /* In byte size, sectorsize aligned */
4854         u64 search_start;       /* Logical range start we search for csum */
4855         u64 search_len;         /* Logical range len we search for csum */
4856         unsigned int extent_type;
4857         unsigned int is_hole;
4858         int compressed = 0;
4859         int ret;
4860         int err = 0;
4861
4862         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4863
4864         /* Check inline extent */
4865         extent_type = btrfs_file_extent_type(node, fi);
4866         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4867                 struct btrfs_item *e = btrfs_item_nr(slot);
4868                 u32 item_inline_len;
4869
4870                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4871                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4872                 compressed = btrfs_file_extent_compression(node, fi);
4873                 if (extent_num_bytes == 0) {
4874                         error(
4875                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4876                                 root->objectid, fkey->objectid, fkey->offset);
4877                         err |= FILE_EXTENT_ERROR;
4878                 }
4879                 if (!compressed && extent_num_bytes != item_inline_len) {
4880                         error(
4881                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4882                                 root->objectid, fkey->objectid, fkey->offset,
4883                                 extent_num_bytes, item_inline_len);
4884                         err |= FILE_EXTENT_ERROR;
4885                 }
4886                 *end += extent_num_bytes;
4887                 *size += extent_num_bytes;
4888                 return err;
4889         }
4890
4891         /* Check extent type */
4892         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4893                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4894                 err |= FILE_EXTENT_ERROR;
4895                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4896                       root->objectid, fkey->objectid, fkey->offset);
4897                 return err;
4898         }
4899
4900         /* Check REG_EXTENT/PREALLOC_EXTENT */
4901         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4902         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4903         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4904         extent_offset = btrfs_file_extent_offset(node, fi);
4905         compressed = btrfs_file_extent_compression(node, fi);
4906         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4907
4908         /*
4909          * Check EXTENT_DATA csum
4910          *
4911          * For plain (uncompressed) extent, we should only check the range
4912          * we're referring to, as it's possible that part of prealloc extent
4913          * has been written, and has csum:
4914          *
4915          * |<--- Original large preallocated extent A ---->|
4916          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4917          *      No csum                         Has csum
4918          *
4919          * For compressed extent, we should check the whole range.
4920          */
4921         if (!compressed) {
4922                 search_start = disk_bytenr + extent_offset;
4923                 search_len = extent_num_bytes;
4924         } else {
4925                 search_start = disk_bytenr;
4926                 search_len = disk_num_bytes;
4927         }
4928         ret = count_csum_range(root, search_start, search_len, &csum_found);
4929         if (csum_found > 0 && nodatasum) {
4930                 err |= ODD_CSUM_ITEM;
4931                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4932                       root->objectid, fkey->objectid, fkey->offset);
4933         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4934                    !is_hole && (ret < 0 || csum_found < search_len)) {
4935                 err |= CSUM_ITEM_MISSING;
4936                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4937                       root->objectid, fkey->objectid, fkey->offset,
4938                       csum_found, search_len);
4939         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4940                 err |= ODD_CSUM_ITEM;
4941                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4942                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4943         }
4944
4945         /* Check EXTENT_DATA hole */
4946         if (!no_holes && *end != fkey->offset) {
4947                 err |= FILE_EXTENT_ERROR;
4948                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4949                       root->objectid, fkey->objectid, fkey->offset);
4950         }
4951
4952         *end += extent_num_bytes;
4953         if (!is_hole)
4954                 *size += extent_num_bytes;
4955
4956         return err;
4957 }
4958
4959 /*
4960  * Set inode item nbytes to @nbytes
4961  *
4962  * Returns  0     on success
4963  * Returns  != 0  on error
4964  */
4965 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4966                                       struct btrfs_path *path,
4967                                       u64 ino, u64 nbytes)
4968 {
4969         struct btrfs_trans_handle *trans;
4970         struct btrfs_inode_item *ii;
4971         struct btrfs_key key;
4972         struct btrfs_key research_key;
4973         int err = 0;
4974         int ret;
4975
4976         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4977
4978         key.objectid = ino;
4979         key.type = BTRFS_INODE_ITEM_KEY;
4980         key.offset = 0;
4981
4982         trans = btrfs_start_transaction(root, 1);
4983         if (IS_ERR(trans)) {
4984                 ret = PTR_ERR(trans);
4985                 err |= ret;
4986                 goto out;
4987         }
4988
4989         btrfs_release_path(path);
4990         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4991         if (ret > 0)
4992                 ret = -ENOENT;
4993         if (ret) {
4994                 err |= ret;
4995                 goto fail;
4996         }
4997
4998         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
4999                             struct btrfs_inode_item);
5000         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5001         btrfs_mark_buffer_dirty(path->nodes[0]);
5002 fail:
5003         btrfs_commit_transaction(trans, root);
5004 out:
5005         if (ret)
5006                 error("failed to set nbytes in inode %llu root %llu",
5007                       ino, root->root_key.objectid);
5008         else
5009                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5010                        root->root_key.objectid, nbytes);
5011
5012         /* research path */
5013         btrfs_release_path(path);
5014         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5015         err |= ret;
5016
5017         return err;
5018 }
5019
5020 /*
5021  * Set directory inode isize to @isize.
5022  *
5023  * Returns 0     on success.
5024  * Returns != 0  on error.
5025  */
5026 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5027                                    struct btrfs_path *path,
5028                                    u64 ino, u64 isize)
5029 {
5030         struct btrfs_trans_handle *trans;
5031         struct btrfs_inode_item *ii;
5032         struct btrfs_key key;
5033         struct btrfs_key research_key;
5034         int ret;
5035         int err = 0;
5036
5037         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5038
5039         key.objectid = ino;
5040         key.type = BTRFS_INODE_ITEM_KEY;
5041         key.offset = 0;
5042
5043         trans = btrfs_start_transaction(root, 1);
5044         if (IS_ERR(trans)) {
5045                 ret = PTR_ERR(trans);
5046                 err |= ret;
5047                 goto out;
5048         }
5049
5050         btrfs_release_path(path);
5051         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5052         if (ret > 0)
5053                 ret = -ENOENT;
5054         if (ret) {
5055                 err |= ret;
5056                 goto fail;
5057         }
5058
5059         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5060                             struct btrfs_inode_item);
5061         btrfs_set_inode_size(path->nodes[0], ii, isize);
5062         btrfs_mark_buffer_dirty(path->nodes[0]);
5063 fail:
5064         btrfs_commit_transaction(trans, root);
5065 out:
5066         if (ret)
5067                 error("failed to set isize in inode %llu root %llu",
5068                       ino, root->root_key.objectid);
5069         else
5070                 printf("Set isize in inode %llu root %llu to %llu\n",
5071                        ino, root->root_key.objectid, isize);
5072
5073         btrfs_release_path(path);
5074         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5075         err |= ret;
5076
5077         return err;
5078 }
5079
5080 /*
5081  * Wrapper function for btrfs_add_orphan_item().
5082  *
5083  * Returns 0     on success.
5084  * Returns != 0  on error.
5085  */
5086 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5087                                            struct btrfs_path *path, u64 ino)
5088 {
5089         struct btrfs_trans_handle *trans;
5090         struct btrfs_key research_key;
5091         int ret;
5092         int err = 0;
5093
5094         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5095
5096         trans = btrfs_start_transaction(root, 1);
5097         if (IS_ERR(trans)) {
5098                 ret = PTR_ERR(trans);
5099                 err |= ret;
5100                 goto out;
5101         }
5102
5103         btrfs_release_path(path);
5104         ret = btrfs_add_orphan_item(trans, root, path, ino);
5105         err |= ret;
5106         btrfs_commit_transaction(trans, root);
5107 out:
5108         if (ret)
5109                 error("failed to add inode %llu as orphan item root %llu",
5110                       ino, root->root_key.objectid);
5111         else
5112                 printf("Added inode %llu as orphan item root %llu\n",
5113                        ino, root->root_key.objectid);
5114
5115         btrfs_release_path(path);
5116         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5117         err |= ret;
5118
5119         return err;
5120 }
5121
5122 /*
5123  * Check INODE_ITEM and related ITEMs (the same inode number)
5124  * 1. check link count
5125  * 2. check inode ref/extref
5126  * 3. check dir item/index
5127  *
5128  * @ext_ref:    the EXTENDED_IREF feature
5129  *
5130  * Return 0 if no error occurred.
5131  * Return >0 for error or hit the traversal is done(by error bitmap)
5132  */
5133 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5134                             unsigned int ext_ref)
5135 {
5136         struct extent_buffer *node;
5137         struct btrfs_inode_item *ii;
5138         struct btrfs_key key;
5139         u64 inode_id;
5140         u32 mode;
5141         u64 nlink;
5142         u64 nbytes;
5143         u64 isize;
5144         u64 size = 0;
5145         u64 refs = 0;
5146         u64 extent_end = 0;
5147         u64 extent_size = 0;
5148         unsigned int dir;
5149         unsigned int nodatasum;
5150         int slot;
5151         int ret;
5152         int err = 0;
5153
5154         node = path->nodes[0];
5155         slot = path->slots[0];
5156
5157         btrfs_item_key_to_cpu(node, &key, slot);
5158         inode_id = key.objectid;
5159
5160         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5161                 ret = btrfs_next_item(root, path);
5162                 if (ret > 0)
5163                         err |= LAST_ITEM;
5164                 return err;
5165         }
5166
5167         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5168         isize = btrfs_inode_size(node, ii);
5169         nbytes = btrfs_inode_nbytes(node, ii);
5170         mode = btrfs_inode_mode(node, ii);
5171         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5172         nlink = btrfs_inode_nlink(node, ii);
5173         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5174
5175         while (1) {
5176                 ret = btrfs_next_item(root, path);
5177                 if (ret < 0) {
5178                         /* out will fill 'err' rusing current statistics */
5179                         goto out;
5180                 } else if (ret > 0) {
5181                         err |= LAST_ITEM;
5182                         goto out;
5183                 }
5184
5185                 node = path->nodes[0];
5186                 slot = path->slots[0];
5187                 btrfs_item_key_to_cpu(node, &key, slot);
5188                 if (key.objectid != inode_id)
5189                         goto out;
5190
5191                 switch (key.type) {
5192                 case BTRFS_INODE_REF_KEY:
5193                         ret = check_inode_ref(root, &key, node, slot, &refs,
5194                                               mode);
5195                         err |= ret;
5196                         break;
5197                 case BTRFS_INODE_EXTREF_KEY:
5198                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5199                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5200                                         root->objectid, key.objectid,
5201                                         key.offset);
5202                         ret = check_inode_extref(root, &key, node, slot, &refs,
5203                                                  mode);
5204                         err |= ret;
5205                         break;
5206                 case BTRFS_DIR_ITEM_KEY:
5207                 case BTRFS_DIR_INDEX_KEY:
5208                         if (!dir) {
5209                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5210                                         root->objectid, inode_id,
5211                                         imode_to_type(mode), key.objectid,
5212                                         key.offset);
5213                         }
5214                         ret = check_dir_item(root, &key, node, slot, &size,
5215                                              ext_ref);
5216                         err |= ret;
5217                         break;
5218                 case BTRFS_EXTENT_DATA_KEY:
5219                         if (dir) {
5220                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5221                                         root->objectid, inode_id, key.objectid,
5222                                         key.offset);
5223                         }
5224                         ret = check_file_extent(root, &key, node, slot,
5225                                                 nodatasum, &extent_size,
5226                                                 &extent_end);
5227                         err |= ret;
5228                         break;
5229                 case BTRFS_XATTR_ITEM_KEY:
5230                         break;
5231                 default:
5232                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5233                               key.objectid, key.type, key.offset);
5234                 }
5235         }
5236
5237 out:
5238         /* verify INODE_ITEM nlink/isize/nbytes */
5239         if (dir) {
5240                 if (nlink != 1) {
5241                         err |= LINK_COUNT_ERROR;
5242                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5243                               root->objectid, inode_id, nlink);
5244                 }
5245
5246                 /*
5247                  * Just a warning, as dir inode nbytes is just an
5248                  * instructive value.
5249                  */
5250                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5251                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5252                                 root->objectid, inode_id,
5253                                 root->fs_info->nodesize);
5254                 }
5255
5256                 if (isize != size) {
5257                         if (repair)
5258                                 ret = repair_dir_isize_lowmem(root, path,
5259                                                               inode_id, size);
5260                         if (!repair || ret) {
5261                                 err |= ISIZE_ERROR;
5262                                 error(
5263                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5264                                       root->objectid, inode_id, isize, size);
5265                         }
5266                 }
5267         } else {
5268                 if (nlink != refs) {
5269                         err |= LINK_COUNT_ERROR;
5270                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5271                               root->objectid, inode_id, nlink, refs);
5272                 } else if (!nlink) {
5273                         if (repair)
5274                                 ret = repair_inode_orphan_item_lowmem(root,
5275                                                               path, inode_id);
5276                         if (!repair || ret) {
5277                                 err |= ORPHAN_ITEM;
5278                                 error("root %llu INODE[%llu] is orphan item",
5279                                       root->objectid, inode_id);
5280                         }
5281                 }
5282
5283                 if (!nbytes && !no_holes && extent_end < isize) {
5284                         err |= NBYTES_ERROR;
5285                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5286                               root->objectid, inode_id, isize);
5287                 }
5288
5289                 if (nbytes != extent_size) {
5290                         if (repair)
5291                                 ret = repair_inode_nbytes_lowmem(root, path,
5292                                                          inode_id, extent_size);
5293                         if (!repair || ret) {
5294                                 err |= NBYTES_ERROR;
5295                                 error(
5296         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5297                                       root->objectid, inode_id, nbytes,
5298                                       extent_size);
5299                         }
5300                 }
5301         }
5302
5303         return err;
5304 }
5305
5306 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5307 {
5308         struct btrfs_path path;
5309         struct btrfs_key key;
5310         int err = 0;
5311         int ret;
5312
5313         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5314         key.type = BTRFS_INODE_ITEM_KEY;
5315         key.offset = 0;
5316
5317         /* For root being dropped, we don't need to check first inode */
5318         if (btrfs_root_refs(&root->root_item) == 0 &&
5319             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5320             key.objectid)
5321                 return 0;
5322
5323         btrfs_init_path(&path);
5324
5325         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5326         if (ret < 0)
5327                 goto out;
5328         if (ret > 0) {
5329                 ret = 0;
5330                 err |= INODE_ITEM_MISSING;
5331                 error("first inode item of root %llu is missing",
5332                       root->objectid);
5333         }
5334
5335         err |= check_inode_item(root, &path, ext_ref);
5336         err &= ~LAST_ITEM;
5337         if (err && !ret)
5338                 ret = -EIO;
5339 out:
5340         btrfs_release_path(&path);
5341         return ret;
5342 }
5343
5344 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5345                                                 u64 parent, u64 root)
5346 {
5347         struct rb_node *node;
5348         struct tree_backref *back = NULL;
5349         struct tree_backref match = {
5350                 .node = {
5351                         .is_data = 0,
5352                 },
5353         };
5354
5355         if (parent) {
5356                 match.parent = parent;
5357                 match.node.full_backref = 1;
5358         } else {
5359                 match.root = root;
5360         }
5361
5362         node = rb_search(&rec->backref_tree, &match.node.node,
5363                          (rb_compare_keys)compare_extent_backref, NULL);
5364         if (node)
5365                 back = to_tree_backref(rb_node_to_extent_backref(node));
5366
5367         return back;
5368 }
5369
5370 static struct data_backref *find_data_backref(struct extent_record *rec,
5371                                                 u64 parent, u64 root,
5372                                                 u64 owner, u64 offset,
5373                                                 int found_ref,
5374                                                 u64 disk_bytenr, u64 bytes)
5375 {
5376         struct rb_node *node;
5377         struct data_backref *back = NULL;
5378         struct data_backref match = {
5379                 .node = {
5380                         .is_data = 1,
5381                 },
5382                 .owner = owner,
5383                 .offset = offset,
5384                 .bytes = bytes,
5385                 .found_ref = found_ref,
5386                 .disk_bytenr = disk_bytenr,
5387         };
5388
5389         if (parent) {
5390                 match.parent = parent;
5391                 match.node.full_backref = 1;
5392         } else {
5393                 match.root = root;
5394         }
5395
5396         node = rb_search(&rec->backref_tree, &match.node.node,
5397                          (rb_compare_keys)compare_extent_backref, NULL);
5398         if (node)
5399                 back = to_data_backref(rb_node_to_extent_backref(node));
5400
5401         return back;
5402 }
5403 /*
5404  * Iterate all item on the tree and call check_inode_item() to check.
5405  *
5406  * @root:       the root of the tree to be checked.
5407  * @ext_ref:    the EXTENDED_IREF feature
5408  *
5409  * Return 0 if no error found.
5410  * Return <0 for error.
5411  */
5412 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5413 {
5414         struct btrfs_path path;
5415         struct node_refs nrefs;
5416         struct btrfs_root_item *root_item = &root->root_item;
5417         int ret;
5418         int level;
5419         int err = 0;
5420
5421         /*
5422          * We need to manually check the first inode item(256)
5423          * As the following traversal function will only start from
5424          * the first inode item in the leaf, if inode item(256) is missing
5425          * we will just skip it forever.
5426          */
5427         ret = check_fs_first_inode(root, ext_ref);
5428         if (ret < 0)
5429                 return ret;
5430
5431         memset(&nrefs, 0, sizeof(nrefs));
5432         level = btrfs_header_level(root->node);
5433         btrfs_init_path(&path);
5434
5435         if (btrfs_root_refs(root_item) > 0 ||
5436             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5437                 path.nodes[level] = root->node;
5438                 path.slots[level] = 0;
5439                 extent_buffer_get(root->node);
5440         } else {
5441                 struct btrfs_key key;
5442
5443                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5444                 level = root_item->drop_level;
5445                 path.lowest_level = level;
5446                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5447                 if (ret < 0)
5448                         goto out;
5449                 ret = 0;
5450         }
5451
5452         while (1) {
5453                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5454                 err |= !!ret;
5455
5456                 /* if ret is negative, walk shall stop */
5457                 if (ret < 0) {
5458                         ret = err;
5459                         break;
5460                 }
5461
5462                 ret = walk_up_tree_v2(root, &path, &level);
5463                 if (ret != 0) {
5464                         /* Normal exit, reset ret to err */
5465                         ret = err;
5466                         break;
5467                 }
5468         }
5469
5470 out:
5471         btrfs_release_path(&path);
5472         return ret;
5473 }
5474
5475 /*
5476  * Find the relative ref for root_ref and root_backref.
5477  *
5478  * @root:       the root of the root tree.
5479  * @ref_key:    the key of the root ref.
5480  *
5481  * Return 0 if no error occurred.
5482  */
5483 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5484                           struct extent_buffer *node, int slot)
5485 {
5486         struct btrfs_path path;
5487         struct btrfs_key key;
5488         struct btrfs_root_ref *ref;
5489         struct btrfs_root_ref *backref;
5490         char ref_name[BTRFS_NAME_LEN] = {0};
5491         char backref_name[BTRFS_NAME_LEN] = {0};
5492         u64 ref_dirid;
5493         u64 ref_seq;
5494         u32 ref_namelen;
5495         u64 backref_dirid;
5496         u64 backref_seq;
5497         u32 backref_namelen;
5498         u32 len;
5499         int ret;
5500         int err = 0;
5501
5502         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5503         ref_dirid = btrfs_root_ref_dirid(node, ref);
5504         ref_seq = btrfs_root_ref_sequence(node, ref);
5505         ref_namelen = btrfs_root_ref_name_len(node, ref);
5506
5507         if (ref_namelen <= BTRFS_NAME_LEN) {
5508                 len = ref_namelen;
5509         } else {
5510                 len = BTRFS_NAME_LEN;
5511                 warning("%s[%llu %llu] ref_name too long",
5512                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5513                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5514                         ref_key->offset);
5515         }
5516         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5517
5518         /* Find relative root_ref */
5519         key.objectid = ref_key->offset;
5520         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5521         key.offset = ref_key->objectid;
5522
5523         btrfs_init_path(&path);
5524         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5525         if (ret) {
5526                 err |= ROOT_REF_MISSING;
5527                 error("%s[%llu %llu] couldn't find relative ref",
5528                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5529                       "ROOT_REF" : "ROOT_BACKREF",
5530                       ref_key->objectid, ref_key->offset);
5531                 goto out;
5532         }
5533
5534         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5535                                  struct btrfs_root_ref);
5536         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5537         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5538         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5539
5540         if (backref_namelen <= BTRFS_NAME_LEN) {
5541                 len = backref_namelen;
5542         } else {
5543                 len = BTRFS_NAME_LEN;
5544                 warning("%s[%llu %llu] ref_name too long",
5545                         key.type == BTRFS_ROOT_REF_KEY ?
5546                         "ROOT_REF" : "ROOT_BACKREF",
5547                         key.objectid, key.offset);
5548         }
5549         read_extent_buffer(path.nodes[0], backref_name,
5550                            (unsigned long)(backref + 1), len);
5551
5552         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5553             ref_namelen != backref_namelen ||
5554             strncmp(ref_name, backref_name, len)) {
5555                 err |= ROOT_REF_MISMATCH;
5556                 error("%s[%llu %llu] mismatch relative ref",
5557                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5558                       "ROOT_REF" : "ROOT_BACKREF",
5559                       ref_key->objectid, ref_key->offset);
5560         }
5561 out:
5562         btrfs_release_path(&path);
5563         return err;
5564 }
5565
5566 /*
5567  * Check all fs/file tree in low_memory mode.
5568  *
5569  * 1. for fs tree root item, call check_fs_root_v2()
5570  * 2. for fs tree root ref/backref, call check_root_ref()
5571  *
5572  * Return 0 if no error occurred.
5573  */
5574 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5575 {
5576         struct btrfs_root *tree_root = fs_info->tree_root;
5577         struct btrfs_root *cur_root = NULL;
5578         struct btrfs_path path;
5579         struct btrfs_key key;
5580         struct extent_buffer *node;
5581         unsigned int ext_ref;
5582         int slot;
5583         int ret;
5584         int err = 0;
5585
5586         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5587
5588         btrfs_init_path(&path);
5589         key.objectid = BTRFS_FS_TREE_OBJECTID;
5590         key.offset = 0;
5591         key.type = BTRFS_ROOT_ITEM_KEY;
5592
5593         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5594         if (ret < 0) {
5595                 err = ret;
5596                 goto out;
5597         } else if (ret > 0) {
5598                 err = -ENOENT;
5599                 goto out;
5600         }
5601
5602         while (1) {
5603                 node = path.nodes[0];
5604                 slot = path.slots[0];
5605                 btrfs_item_key_to_cpu(node, &key, slot);
5606                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5607                         goto out;
5608                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5609                     fs_root_objectid(key.objectid)) {
5610                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5611                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5612                                                                        &key);
5613                         } else {
5614                                 key.offset = (u64)-1;
5615                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5616                         }
5617
5618                         if (IS_ERR(cur_root)) {
5619                                 error("Fail to read fs/subvol tree: %lld",
5620                                       key.objectid);
5621                                 err = -EIO;
5622                                 goto next;
5623                         }
5624
5625                         ret = check_fs_root_v2(cur_root, ext_ref);
5626                         err |= ret;
5627
5628                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5629                                 btrfs_free_fs_root(cur_root);
5630                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5631                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5632                         ret = check_root_ref(tree_root, &key, node, slot);
5633                         err |= ret;
5634                 }
5635 next:
5636                 ret = btrfs_next_item(tree_root, &path);
5637                 if (ret > 0)
5638                         goto out;
5639                 if (ret < 0) {
5640                         err = ret;
5641                         goto out;
5642                 }
5643         }
5644
5645 out:
5646         btrfs_release_path(&path);
5647         return err;
5648 }
5649
5650 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5651                           struct cache_tree *root_cache)
5652 {
5653         int ret;
5654
5655         if (!ctx.progress_enabled)
5656                 fprintf(stderr, "checking fs roots\n");
5657         if (check_mode == CHECK_MODE_LOWMEM)
5658                 ret = check_fs_roots_v2(fs_info);
5659         else
5660                 ret = check_fs_roots(fs_info, root_cache);
5661
5662         return ret;
5663 }
5664
5665 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5666 {
5667         struct extent_backref *back, *tmp;
5668         struct tree_backref *tback;
5669         struct data_backref *dback;
5670         u64 found = 0;
5671         int err = 0;
5672
5673         rbtree_postorder_for_each_entry_safe(back, tmp,
5674                                              &rec->backref_tree, node) {
5675                 if (!back->found_extent_tree) {
5676                         err = 1;
5677                         if (!print_errs)
5678                                 goto out;
5679                         if (back->is_data) {
5680                                 dback = to_data_backref(back);
5681                                 fprintf(stderr, "Data backref %llu %s %llu"
5682                                         " owner %llu offset %llu num_refs %lu"
5683                                         " not found in extent tree\n",
5684                                         (unsigned long long)rec->start,
5685                                         back->full_backref ?
5686                                         "parent" : "root",
5687                                         back->full_backref ?
5688                                         (unsigned long long)dback->parent:
5689                                         (unsigned long long)dback->root,
5690                                         (unsigned long long)dback->owner,
5691                                         (unsigned long long)dback->offset,
5692                                         (unsigned long)dback->num_refs);
5693                         } else {
5694                                 tback = to_tree_backref(back);
5695                                 fprintf(stderr, "Tree backref %llu parent %llu"
5696                                         " root %llu not found in extent tree\n",
5697                                         (unsigned long long)rec->start,
5698                                         (unsigned long long)tback->parent,
5699                                         (unsigned long long)tback->root);
5700                         }
5701                 }
5702                 if (!back->is_data && !back->found_ref) {
5703                         err = 1;
5704                         if (!print_errs)
5705                                 goto out;
5706                         tback = to_tree_backref(back);
5707                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5708                                 (unsigned long long)rec->start,
5709                                 back->full_backref ? "parent" : "root",
5710                                 back->full_backref ?
5711                                 (unsigned long long)tback->parent :
5712                                 (unsigned long long)tback->root, back);
5713                 }
5714                 if (back->is_data) {
5715                         dback = to_data_backref(back);
5716                         if (dback->found_ref != dback->num_refs) {
5717                                 err = 1;
5718                                 if (!print_errs)
5719                                         goto out;
5720                                 fprintf(stderr, "Incorrect local backref count"
5721                                         " on %llu %s %llu owner %llu"
5722                                         " offset %llu found %u wanted %u back %p\n",
5723                                         (unsigned long long)rec->start,
5724                                         back->full_backref ?
5725                                         "parent" : "root",
5726                                         back->full_backref ?
5727                                         (unsigned long long)dback->parent:
5728                                         (unsigned long long)dback->root,
5729                                         (unsigned long long)dback->owner,
5730                                         (unsigned long long)dback->offset,
5731                                         dback->found_ref, dback->num_refs, back);
5732                         }
5733                         if (dback->disk_bytenr != rec->start) {
5734                                 err = 1;
5735                                 if (!print_errs)
5736                                         goto out;
5737                                 fprintf(stderr, "Backref disk bytenr does not"
5738                                         " match extent record, bytenr=%llu, "
5739                                         "ref bytenr=%llu\n",
5740                                         (unsigned long long)rec->start,
5741                                         (unsigned long long)dback->disk_bytenr);
5742                         }
5743
5744                         if (dback->bytes != rec->nr) {
5745                                 err = 1;
5746                                 if (!print_errs)
5747                                         goto out;
5748                                 fprintf(stderr, "Backref bytes do not match "
5749                                         "extent backref, bytenr=%llu, ref "
5750                                         "bytes=%llu, backref bytes=%llu\n",
5751                                         (unsigned long long)rec->start,
5752                                         (unsigned long long)rec->nr,
5753                                         (unsigned long long)dback->bytes);
5754                         }
5755                 }
5756                 if (!back->is_data) {
5757                         found += 1;
5758                 } else {
5759                         dback = to_data_backref(back);
5760                         found += dback->found_ref;
5761                 }
5762         }
5763         if (found != rec->refs) {
5764                 err = 1;
5765                 if (!print_errs)
5766                         goto out;
5767                 fprintf(stderr, "Incorrect global backref count "
5768                         "on %llu found %llu wanted %llu\n",
5769                         (unsigned long long)rec->start,
5770                         (unsigned long long)found,
5771                         (unsigned long long)rec->refs);
5772         }
5773 out:
5774         return err;
5775 }
5776
5777 static void __free_one_backref(struct rb_node *node)
5778 {
5779         struct extent_backref *back = rb_node_to_extent_backref(node);
5780
5781         free(back);
5782 }
5783
5784 static void free_all_extent_backrefs(struct extent_record *rec)
5785 {
5786         rb_free_nodes(&rec->backref_tree, __free_one_backref);
5787 }
5788
5789 static void free_extent_record_cache(struct cache_tree *extent_cache)
5790 {
5791         struct cache_extent *cache;
5792         struct extent_record *rec;
5793
5794         while (1) {
5795                 cache = first_cache_extent(extent_cache);
5796                 if (!cache)
5797                         break;
5798                 rec = container_of(cache, struct extent_record, cache);
5799                 remove_cache_extent(extent_cache, cache);
5800                 free_all_extent_backrefs(rec);
5801                 free(rec);
5802         }
5803 }
5804
5805 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5806                                  struct extent_record *rec)
5807 {
5808         if (rec->content_checked && rec->owner_ref_checked &&
5809             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5810             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5811             !rec->bad_full_backref && !rec->crossing_stripes &&
5812             !rec->wrong_chunk_type) {
5813                 remove_cache_extent(extent_cache, &rec->cache);
5814                 free_all_extent_backrefs(rec);
5815                 list_del_init(&rec->list);
5816                 free(rec);
5817         }
5818         return 0;
5819 }
5820
5821 static int check_owner_ref(struct btrfs_root *root,
5822                             struct extent_record *rec,
5823                             struct extent_buffer *buf)
5824 {
5825         struct extent_backref *node, *tmp;
5826         struct tree_backref *back;
5827         struct btrfs_root *ref_root;
5828         struct btrfs_key key;
5829         struct btrfs_path path;
5830         struct extent_buffer *parent;
5831         int level;
5832         int found = 0;
5833         int ret;
5834
5835         rbtree_postorder_for_each_entry_safe(node, tmp,
5836                                              &rec->backref_tree, node) {
5837                 if (node->is_data)
5838                         continue;
5839                 if (!node->found_ref)
5840                         continue;
5841                 if (node->full_backref)
5842                         continue;
5843                 back = to_tree_backref(node);
5844                 if (btrfs_header_owner(buf) == back->root)
5845                         return 0;
5846         }
5847         BUG_ON(rec->is_root);
5848
5849         /* try to find the block by search corresponding fs tree */
5850         key.objectid = btrfs_header_owner(buf);
5851         key.type = BTRFS_ROOT_ITEM_KEY;
5852         key.offset = (u64)-1;
5853
5854         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5855         if (IS_ERR(ref_root))
5856                 return 1;
5857
5858         level = btrfs_header_level(buf);
5859         if (level == 0)
5860                 btrfs_item_key_to_cpu(buf, &key, 0);
5861         else
5862                 btrfs_node_key_to_cpu(buf, &key, 0);
5863
5864         btrfs_init_path(&path);
5865         path.lowest_level = level + 1;
5866         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5867         if (ret < 0)
5868                 return 0;
5869
5870         parent = path.nodes[level + 1];
5871         if (parent && buf->start == btrfs_node_blockptr(parent,
5872                                                         path.slots[level + 1]))
5873                 found = 1;
5874
5875         btrfs_release_path(&path);
5876         return found ? 0 : 1;
5877 }
5878
5879 static int is_extent_tree_record(struct extent_record *rec)
5880 {
5881         struct extent_backref *node, *tmp;
5882         struct tree_backref *back;
5883         int is_extent = 0;
5884
5885         rbtree_postorder_for_each_entry_safe(node, tmp,
5886                                              &rec->backref_tree, node) {
5887                 if (node->is_data)
5888                         return 0;
5889                 back = to_tree_backref(node);
5890                 if (node->full_backref)
5891                         return 0;
5892                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5893                         is_extent = 1;
5894         }
5895         return is_extent;
5896 }
5897
5898
5899 static int record_bad_block_io(struct btrfs_fs_info *info,
5900                                struct cache_tree *extent_cache,
5901                                u64 start, u64 len)
5902 {
5903         struct extent_record *rec;
5904         struct cache_extent *cache;
5905         struct btrfs_key key;
5906
5907         cache = lookup_cache_extent(extent_cache, start, len);
5908         if (!cache)
5909                 return 0;
5910
5911         rec = container_of(cache, struct extent_record, cache);
5912         if (!is_extent_tree_record(rec))
5913                 return 0;
5914
5915         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5916         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5917 }
5918
5919 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5920                        struct extent_buffer *buf, int slot)
5921 {
5922         if (btrfs_header_level(buf)) {
5923                 struct btrfs_key_ptr ptr1, ptr2;
5924
5925                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5926                                    sizeof(struct btrfs_key_ptr));
5927                 read_extent_buffer(buf, &ptr2,
5928                                    btrfs_node_key_ptr_offset(slot + 1),
5929                                    sizeof(struct btrfs_key_ptr));
5930                 write_extent_buffer(buf, &ptr1,
5931                                     btrfs_node_key_ptr_offset(slot + 1),
5932                                     sizeof(struct btrfs_key_ptr));
5933                 write_extent_buffer(buf, &ptr2,
5934                                     btrfs_node_key_ptr_offset(slot),
5935                                     sizeof(struct btrfs_key_ptr));
5936                 if (slot == 0) {
5937                         struct btrfs_disk_key key;
5938                         btrfs_node_key(buf, &key, 0);
5939                         btrfs_fixup_low_keys(root, path, &key,
5940                                              btrfs_header_level(buf) + 1);
5941                 }
5942         } else {
5943                 struct btrfs_item *item1, *item2;
5944                 struct btrfs_key k1, k2;
5945                 char *item1_data, *item2_data;
5946                 u32 item1_offset, item2_offset, item1_size, item2_size;
5947
5948                 item1 = btrfs_item_nr(slot);
5949                 item2 = btrfs_item_nr(slot + 1);
5950                 btrfs_item_key_to_cpu(buf, &k1, slot);
5951                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5952                 item1_offset = btrfs_item_offset(buf, item1);
5953                 item2_offset = btrfs_item_offset(buf, item2);
5954                 item1_size = btrfs_item_size(buf, item1);
5955                 item2_size = btrfs_item_size(buf, item2);
5956
5957                 item1_data = malloc(item1_size);
5958                 if (!item1_data)
5959                         return -ENOMEM;
5960                 item2_data = malloc(item2_size);
5961                 if (!item2_data) {
5962                         free(item1_data);
5963                         return -ENOMEM;
5964                 }
5965
5966                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5967                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5968
5969                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5970                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5971                 free(item1_data);
5972                 free(item2_data);
5973
5974                 btrfs_set_item_offset(buf, item1, item2_offset);
5975                 btrfs_set_item_offset(buf, item2, item1_offset);
5976                 btrfs_set_item_size(buf, item1, item2_size);
5977                 btrfs_set_item_size(buf, item2, item1_size);
5978
5979                 path->slots[0] = slot;
5980                 btrfs_set_item_key_unsafe(root, path, &k2);
5981                 path->slots[0] = slot + 1;
5982                 btrfs_set_item_key_unsafe(root, path, &k1);
5983         }
5984         return 0;
5985 }
5986
5987 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5988 {
5989         struct extent_buffer *buf;
5990         struct btrfs_key k1, k2;
5991         int i;
5992         int level = path->lowest_level;
5993         int ret = -EIO;
5994
5995         buf = path->nodes[level];
5996         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5997                 if (level) {
5998                         btrfs_node_key_to_cpu(buf, &k1, i);
5999                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6000                 } else {
6001                         btrfs_item_key_to_cpu(buf, &k1, i);
6002                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6003                 }
6004                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6005                         continue;
6006                 ret = swap_values(root, path, buf, i);
6007                 if (ret)
6008                         break;
6009                 btrfs_mark_buffer_dirty(buf);
6010                 i = 0;
6011         }
6012         return ret;
6013 }
6014
6015 static int delete_bogus_item(struct btrfs_root *root,
6016                              struct btrfs_path *path,
6017                              struct extent_buffer *buf, int slot)
6018 {
6019         struct btrfs_key key;
6020         int nritems = btrfs_header_nritems(buf);
6021
6022         btrfs_item_key_to_cpu(buf, &key, slot);
6023
6024         /* These are all the keys we can deal with missing. */
6025         if (key.type != BTRFS_DIR_INDEX_KEY &&
6026             key.type != BTRFS_EXTENT_ITEM_KEY &&
6027             key.type != BTRFS_METADATA_ITEM_KEY &&
6028             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6029             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6030                 return -1;
6031
6032         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6033                (unsigned long long)key.objectid, key.type,
6034                (unsigned long long)key.offset, slot, buf->start);
6035         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6036                               btrfs_item_nr_offset(slot + 1),
6037                               sizeof(struct btrfs_item) *
6038                               (nritems - slot - 1));
6039         btrfs_set_header_nritems(buf, nritems - 1);
6040         if (slot == 0) {
6041                 struct btrfs_disk_key disk_key;
6042
6043                 btrfs_item_key(buf, &disk_key, 0);
6044                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6045         }
6046         btrfs_mark_buffer_dirty(buf);
6047         return 0;
6048 }
6049
6050 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6051 {
6052         struct extent_buffer *buf;
6053         int i;
6054         int ret = 0;
6055
6056         /* We should only get this for leaves */
6057         BUG_ON(path->lowest_level);
6058         buf = path->nodes[0];
6059 again:
6060         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6061                 unsigned int shift = 0, offset;
6062
6063                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6064                     BTRFS_LEAF_DATA_SIZE(root)) {
6065                         if (btrfs_item_end_nr(buf, i) >
6066                             BTRFS_LEAF_DATA_SIZE(root)) {
6067                                 ret = delete_bogus_item(root, path, buf, i);
6068                                 if (!ret)
6069                                         goto again;
6070                                 fprintf(stderr, "item is off the end of the "
6071                                         "leaf, can't fix\n");
6072                                 ret = -EIO;
6073                                 break;
6074                         }
6075                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6076                                 btrfs_item_end_nr(buf, i);
6077                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6078                            btrfs_item_offset_nr(buf, i - 1)) {
6079                         if (btrfs_item_end_nr(buf, i) >
6080                             btrfs_item_offset_nr(buf, i - 1)) {
6081                                 ret = delete_bogus_item(root, path, buf, i);
6082                                 if (!ret)
6083                                         goto again;
6084                                 fprintf(stderr, "items overlap, can't fix\n");
6085                                 ret = -EIO;
6086                                 break;
6087                         }
6088                         shift = btrfs_item_offset_nr(buf, i - 1) -
6089                                 btrfs_item_end_nr(buf, i);
6090                 }
6091                 if (!shift)
6092                         continue;
6093
6094                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6095                        i, shift, (unsigned long long)buf->start);
6096                 offset = btrfs_item_offset_nr(buf, i);
6097                 memmove_extent_buffer(buf,
6098                                       btrfs_leaf_data(buf) + offset + shift,
6099                                       btrfs_leaf_data(buf) + offset,
6100                                       btrfs_item_size_nr(buf, i));
6101                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6102                                       offset + shift);
6103                 btrfs_mark_buffer_dirty(buf);
6104         }
6105
6106         /*
6107          * We may have moved things, in which case we want to exit so we don't
6108          * write those changes out.  Once we have proper abort functionality in
6109          * progs this can be changed to something nicer.
6110          */
6111         BUG_ON(ret);
6112         return ret;
6113 }
6114
6115 /*
6116  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6117  * then just return -EIO.
6118  */
6119 static int try_to_fix_bad_block(struct btrfs_root *root,
6120                                 struct extent_buffer *buf,
6121                                 enum btrfs_tree_block_status status)
6122 {
6123         struct btrfs_trans_handle *trans;
6124         struct ulist *roots;
6125         struct ulist_node *node;
6126         struct btrfs_root *search_root;
6127         struct btrfs_path path;
6128         struct ulist_iterator iter;
6129         struct btrfs_key root_key, key;
6130         int ret;
6131
6132         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6133             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6134                 return -EIO;
6135
6136         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6137         if (ret)
6138                 return -EIO;
6139
6140         btrfs_init_path(&path);
6141         ULIST_ITER_INIT(&iter);
6142         while ((node = ulist_next(roots, &iter))) {
6143                 root_key.objectid = node->val;
6144                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6145                 root_key.offset = (u64)-1;
6146
6147                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6148                 if (IS_ERR(root)) {
6149                         ret = -EIO;
6150                         break;
6151                 }
6152
6153
6154                 trans = btrfs_start_transaction(search_root, 0);
6155                 if (IS_ERR(trans)) {
6156                         ret = PTR_ERR(trans);
6157                         break;
6158                 }
6159
6160                 path.lowest_level = btrfs_header_level(buf);
6161                 path.skip_check_block = 1;
6162                 if (path.lowest_level)
6163                         btrfs_node_key_to_cpu(buf, &key, 0);
6164                 else
6165                         btrfs_item_key_to_cpu(buf, &key, 0);
6166                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6167                 if (ret) {
6168                         ret = -EIO;
6169                         btrfs_commit_transaction(trans, search_root);
6170                         break;
6171                 }
6172                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6173                         ret = fix_key_order(search_root, &path);
6174                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6175                         ret = fix_item_offset(search_root, &path);
6176                 if (ret) {
6177                         btrfs_commit_transaction(trans, search_root);
6178                         break;
6179                 }
6180                 btrfs_release_path(&path);
6181                 btrfs_commit_transaction(trans, search_root);
6182         }
6183         ulist_free(roots);
6184         btrfs_release_path(&path);
6185         return ret;
6186 }
6187
6188 static int check_block(struct btrfs_root *root,
6189                        struct cache_tree *extent_cache,
6190                        struct extent_buffer *buf, u64 flags)
6191 {
6192         struct extent_record *rec;
6193         struct cache_extent *cache;
6194         struct btrfs_key key;
6195         enum btrfs_tree_block_status status;
6196         int ret = 0;
6197         int level;
6198
6199         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6200         if (!cache)
6201                 return 1;
6202         rec = container_of(cache, struct extent_record, cache);
6203         rec->generation = btrfs_header_generation(buf);
6204
6205         level = btrfs_header_level(buf);
6206         if (btrfs_header_nritems(buf) > 0) {
6207
6208                 if (level == 0)
6209                         btrfs_item_key_to_cpu(buf, &key, 0);
6210                 else
6211                         btrfs_node_key_to_cpu(buf, &key, 0);
6212
6213                 rec->info_objectid = key.objectid;
6214         }
6215         rec->info_level = level;
6216
6217         if (btrfs_is_leaf(buf))
6218                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6219         else
6220                 status = btrfs_check_node(root, &rec->parent_key, buf);
6221
6222         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6223                 if (repair)
6224                         status = try_to_fix_bad_block(root, buf, status);
6225                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6226                         ret = -EIO;
6227                         fprintf(stderr, "bad block %llu\n",
6228                                 (unsigned long long)buf->start);
6229                 } else {
6230                         /*
6231                          * Signal to callers we need to start the scan over
6232                          * again since we'll have cowed blocks.
6233                          */
6234                         ret = -EAGAIN;
6235                 }
6236         } else {
6237                 rec->content_checked = 1;
6238                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6239                         rec->owner_ref_checked = 1;
6240                 else {
6241                         ret = check_owner_ref(root, rec, buf);
6242                         if (!ret)
6243                                 rec->owner_ref_checked = 1;
6244                 }
6245         }
6246         if (!ret)
6247                 maybe_free_extent_rec(extent_cache, rec);
6248         return ret;
6249 }
6250
6251 #if 0
6252 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6253                                                 u64 parent, u64 root)
6254 {
6255         struct list_head *cur = rec->backrefs.next;
6256         struct extent_backref *node;
6257         struct tree_backref *back;
6258
6259         while(cur != &rec->backrefs) {
6260                 node = to_extent_backref(cur);
6261                 cur = cur->next;
6262                 if (node->is_data)
6263                         continue;
6264                 back = to_tree_backref(node);
6265                 if (parent > 0) {
6266                         if (!node->full_backref)
6267                                 continue;
6268                         if (parent == back->parent)
6269                                 return back;
6270                 } else {
6271                         if (node->full_backref)
6272                                 continue;
6273                         if (back->root == root)
6274                                 return back;
6275                 }
6276         }
6277         return NULL;
6278 }
6279 #endif
6280
6281 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6282                                                 u64 parent, u64 root)
6283 {
6284         struct tree_backref *ref = malloc(sizeof(*ref));
6285
6286         if (!ref)
6287                 return NULL;
6288         memset(&ref->node, 0, sizeof(ref->node));
6289         if (parent > 0) {
6290                 ref->parent = parent;
6291                 ref->node.full_backref = 1;
6292         } else {
6293                 ref->root = root;
6294                 ref->node.full_backref = 0;
6295         }
6296
6297         return ref;
6298 }
6299
6300 #if 0
6301 static struct data_backref *find_data_backref(struct extent_record *rec,
6302                                                 u64 parent, u64 root,
6303                                                 u64 owner, u64 offset,
6304                                                 int found_ref,
6305                                                 u64 disk_bytenr, u64 bytes)
6306 {
6307         struct list_head *cur = rec->backrefs.next;
6308         struct extent_backref *node;
6309         struct data_backref *back;
6310
6311         while(cur != &rec->backrefs) {
6312                 node = to_extent_backref(cur);
6313                 cur = cur->next;
6314                 if (!node->is_data)
6315                         continue;
6316                 back = to_data_backref(node);
6317                 if (parent > 0) {
6318                         if (!node->full_backref)
6319                                 continue;
6320                         if (parent == back->parent)
6321                                 return back;
6322                 } else {
6323                         if (node->full_backref)
6324                                 continue;
6325                         if (back->root == root && back->owner == owner &&
6326                             back->offset == offset) {
6327                                 if (found_ref && node->found_ref &&
6328                                     (back->bytes != bytes ||
6329                                     back->disk_bytenr != disk_bytenr))
6330                                         continue;
6331                                 return back;
6332                         }
6333                 }
6334         }
6335         return NULL;
6336 }
6337 #endif
6338
6339 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6340                                                 u64 parent, u64 root,
6341                                                 u64 owner, u64 offset,
6342                                                 u64 max_size)
6343 {
6344         struct data_backref *ref = malloc(sizeof(*ref));
6345
6346         if (!ref)
6347                 return NULL;
6348         memset(&ref->node, 0, sizeof(ref->node));
6349         ref->node.is_data = 1;
6350
6351         if (parent > 0) {
6352                 ref->parent = parent;
6353                 ref->owner = 0;
6354                 ref->offset = 0;
6355                 ref->node.full_backref = 1;
6356         } else {
6357                 ref->root = root;
6358                 ref->owner = owner;
6359                 ref->offset = offset;
6360                 ref->node.full_backref = 0;
6361         }
6362         ref->bytes = max_size;
6363         ref->found_ref = 0;
6364         ref->num_refs = 0;
6365         if (max_size > rec->max_size)
6366                 rec->max_size = max_size;
6367         return ref;
6368 }
6369
6370 /* Check if the type of extent matches with its chunk */
6371 static void check_extent_type(struct extent_record *rec)
6372 {
6373         struct btrfs_block_group_cache *bg_cache;
6374
6375         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6376         if (!bg_cache)
6377                 return;
6378
6379         /* data extent, check chunk directly*/
6380         if (!rec->metadata) {
6381                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6382                         rec->wrong_chunk_type = 1;
6383                 return;
6384         }
6385
6386         /* metadata extent, check the obvious case first */
6387         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6388                                  BTRFS_BLOCK_GROUP_METADATA))) {
6389                 rec->wrong_chunk_type = 1;
6390                 return;
6391         }
6392
6393         /*
6394          * Check SYSTEM extent, as it's also marked as metadata, we can only
6395          * make sure it's a SYSTEM extent by its backref
6396          */
6397         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6398                 struct extent_backref *node;
6399                 struct tree_backref *tback;
6400                 u64 bg_type;
6401
6402                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6403                 if (node->is_data) {
6404                         /* tree block shouldn't have data backref */
6405                         rec->wrong_chunk_type = 1;
6406                         return;
6407                 }
6408                 tback = container_of(node, struct tree_backref, node);
6409
6410                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6411                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6412                 else
6413                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6414                 if (!(bg_cache->flags & bg_type))
6415                         rec->wrong_chunk_type = 1;
6416         }
6417 }
6418
6419 /*
6420  * Allocate a new extent record, fill default values from @tmpl and insert int
6421  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6422  * the cache, otherwise it fails.
6423  */
6424 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6425                 struct extent_record *tmpl)
6426 {
6427         struct extent_record *rec;
6428         int ret = 0;
6429
6430         BUG_ON(tmpl->max_size == 0);
6431         rec = malloc(sizeof(*rec));
6432         if (!rec)
6433                 return -ENOMEM;
6434         rec->start = tmpl->start;
6435         rec->max_size = tmpl->max_size;
6436         rec->nr = max(tmpl->nr, tmpl->max_size);
6437         rec->found_rec = tmpl->found_rec;
6438         rec->content_checked = tmpl->content_checked;
6439         rec->owner_ref_checked = tmpl->owner_ref_checked;
6440         rec->num_duplicates = 0;
6441         rec->metadata = tmpl->metadata;
6442         rec->flag_block_full_backref = FLAG_UNSET;
6443         rec->bad_full_backref = 0;
6444         rec->crossing_stripes = 0;
6445         rec->wrong_chunk_type = 0;
6446         rec->is_root = tmpl->is_root;
6447         rec->refs = tmpl->refs;
6448         rec->extent_item_refs = tmpl->extent_item_refs;
6449         rec->parent_generation = tmpl->parent_generation;
6450         INIT_LIST_HEAD(&rec->backrefs);
6451         INIT_LIST_HEAD(&rec->dups);
6452         INIT_LIST_HEAD(&rec->list);
6453         rec->backref_tree = RB_ROOT;
6454         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6455         rec->cache.start = tmpl->start;
6456         rec->cache.size = tmpl->nr;
6457         ret = insert_cache_extent(extent_cache, &rec->cache);
6458         if (ret) {
6459                 free(rec);
6460                 return ret;
6461         }
6462         bytes_used += rec->nr;
6463
6464         if (tmpl->metadata)
6465                 rec->crossing_stripes = check_crossing_stripes(global_info,
6466                                 rec->start, global_info->nodesize);
6467         check_extent_type(rec);
6468         return ret;
6469 }
6470
6471 /*
6472  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6473  * some are hints:
6474  * - refs              - if found, increase refs
6475  * - is_root           - if found, set
6476  * - content_checked   - if found, set
6477  * - owner_ref_checked - if found, set
6478  *
6479  * If not found, create a new one, initialize and insert.
6480  */
6481 static int add_extent_rec(struct cache_tree *extent_cache,
6482                 struct extent_record *tmpl)
6483 {
6484         struct extent_record *rec;
6485         struct cache_extent *cache;
6486         int ret = 0;
6487         int dup = 0;
6488
6489         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6490         if (cache) {
6491                 rec = container_of(cache, struct extent_record, cache);
6492                 if (tmpl->refs)
6493                         rec->refs++;
6494                 if (rec->nr == 1)
6495                         rec->nr = max(tmpl->nr, tmpl->max_size);
6496
6497                 /*
6498                  * We need to make sure to reset nr to whatever the extent
6499                  * record says was the real size, this way we can compare it to
6500                  * the backrefs.
6501                  */
6502                 if (tmpl->found_rec) {
6503                         if (tmpl->start != rec->start || rec->found_rec) {
6504                                 struct extent_record *tmp;
6505
6506                                 dup = 1;
6507                                 if (list_empty(&rec->list))
6508                                         list_add_tail(&rec->list,
6509                                                       &duplicate_extents);
6510
6511                                 /*
6512                                  * We have to do this song and dance in case we
6513                                  * find an extent record that falls inside of
6514                                  * our current extent record but does not have
6515                                  * the same objectid.
6516                                  */
6517                                 tmp = malloc(sizeof(*tmp));
6518                                 if (!tmp)
6519                                         return -ENOMEM;
6520                                 tmp->start = tmpl->start;
6521                                 tmp->max_size = tmpl->max_size;
6522                                 tmp->nr = tmpl->nr;
6523                                 tmp->found_rec = 1;
6524                                 tmp->metadata = tmpl->metadata;
6525                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6526                                 INIT_LIST_HEAD(&tmp->list);
6527                                 list_add_tail(&tmp->list, &rec->dups);
6528                                 rec->num_duplicates++;
6529                         } else {
6530                                 rec->nr = tmpl->nr;
6531                                 rec->found_rec = 1;
6532                         }
6533                 }
6534
6535                 if (tmpl->extent_item_refs && !dup) {
6536                         if (rec->extent_item_refs) {
6537                                 fprintf(stderr, "block %llu rec "
6538                                         "extent_item_refs %llu, passed %llu\n",
6539                                         (unsigned long long)tmpl->start,
6540                                         (unsigned long long)
6541                                                         rec->extent_item_refs,
6542                                         (unsigned long long)tmpl->extent_item_refs);
6543                         }
6544                         rec->extent_item_refs = tmpl->extent_item_refs;
6545                 }
6546                 if (tmpl->is_root)
6547                         rec->is_root = 1;
6548                 if (tmpl->content_checked)
6549                         rec->content_checked = 1;
6550                 if (tmpl->owner_ref_checked)
6551                         rec->owner_ref_checked = 1;
6552                 memcpy(&rec->parent_key, &tmpl->parent_key,
6553                                 sizeof(tmpl->parent_key));
6554                 if (tmpl->parent_generation)
6555                         rec->parent_generation = tmpl->parent_generation;
6556                 if (rec->max_size < tmpl->max_size)
6557                         rec->max_size = tmpl->max_size;
6558
6559                 /*
6560                  * A metadata extent can't cross stripe_len boundary, otherwise
6561                  * kernel scrub won't be able to handle it.
6562                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6563                  * it.
6564                  */
6565                 if (tmpl->metadata)
6566                         rec->crossing_stripes = check_crossing_stripes(
6567                                         global_info, rec->start,
6568                                         global_info->nodesize);
6569                 check_extent_type(rec);
6570                 maybe_free_extent_rec(extent_cache, rec);
6571                 return ret;
6572         }
6573
6574         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6575
6576         return ret;
6577 }
6578
6579 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6580                             u64 parent, u64 root, int found_ref)
6581 {
6582         struct extent_record *rec;
6583         struct tree_backref *back;
6584         struct cache_extent *cache;
6585         int ret;
6586         bool insert = false;
6587
6588         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6589         if (!cache) {
6590                 struct extent_record tmpl;
6591
6592                 memset(&tmpl, 0, sizeof(tmpl));
6593                 tmpl.start = bytenr;
6594                 tmpl.nr = 1;
6595                 tmpl.metadata = 1;
6596                 tmpl.max_size = 1;
6597
6598                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6599                 if (ret)
6600                         return ret;
6601
6602                 /* really a bug in cache_extent implement now */
6603                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6604                 if (!cache)
6605                         return -ENOENT;
6606         }
6607
6608         rec = container_of(cache, struct extent_record, cache);
6609         if (rec->start != bytenr) {
6610                 /*
6611                  * Several cause, from unaligned bytenr to over lapping extents
6612                  */
6613                 return -EEXIST;
6614         }
6615
6616         back = find_tree_backref(rec, parent, root);
6617         if (!back) {
6618                 back = alloc_tree_backref(rec, parent, root);
6619                 if (!back)
6620                         return -ENOMEM;
6621                 insert = true;
6622         }
6623
6624         if (found_ref) {
6625                 if (back->node.found_ref) {
6626                         fprintf(stderr, "Extent back ref already exists "
6627                                 "for %llu parent %llu root %llu \n",
6628                                 (unsigned long long)bytenr,
6629                                 (unsigned long long)parent,
6630                                 (unsigned long long)root);
6631                 }
6632                 back->node.found_ref = 1;
6633         } else {
6634                 if (back->node.found_extent_tree) {
6635                         fprintf(stderr, "Extent back ref already exists "
6636                                 "for %llu parent %llu root %llu \n",
6637                                 (unsigned long long)bytenr,
6638                                 (unsigned long long)parent,
6639                                 (unsigned long long)root);
6640                 }
6641                 back->node.found_extent_tree = 1;
6642         }
6643         if (insert)
6644                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6645                         compare_extent_backref));
6646         check_extent_type(rec);
6647         maybe_free_extent_rec(extent_cache, rec);
6648         return 0;
6649 }
6650
6651 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6652                             u64 parent, u64 root, u64 owner, u64 offset,
6653                             u32 num_refs, int found_ref, u64 max_size)
6654 {
6655         struct extent_record *rec;
6656         struct data_backref *back;
6657         struct cache_extent *cache;
6658         int ret;
6659         bool insert = false;
6660
6661         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6662         if (!cache) {
6663                 struct extent_record tmpl;
6664
6665                 memset(&tmpl, 0, sizeof(tmpl));
6666                 tmpl.start = bytenr;
6667                 tmpl.nr = 1;
6668                 tmpl.max_size = max_size;
6669
6670                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6671                 if (ret)
6672                         return ret;
6673
6674                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6675                 if (!cache)
6676                         abort();
6677         }
6678
6679         rec = container_of(cache, struct extent_record, cache);
6680         if (rec->max_size < max_size)
6681                 rec->max_size = max_size;
6682
6683         /*
6684          * If found_ref is set then max_size is the real size and must match the
6685          * existing refs.  So if we have already found a ref then we need to
6686          * make sure that this ref matches the existing one, otherwise we need
6687          * to add a new backref so we can notice that the backrefs don't match
6688          * and we need to figure out who is telling the truth.  This is to
6689          * account for that awful fsync bug I introduced where we'd end up with
6690          * a btrfs_file_extent_item that would have its length include multiple
6691          * prealloc extents or point inside of a prealloc extent.
6692          */
6693         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6694                                  bytenr, max_size);
6695         if (!back) {
6696                 back = alloc_data_backref(rec, parent, root, owner, offset,
6697                                           max_size);
6698                 BUG_ON(!back);
6699                 insert = true;
6700         }
6701
6702         if (found_ref) {
6703                 BUG_ON(num_refs != 1);
6704                 if (back->node.found_ref)
6705                         BUG_ON(back->bytes != max_size);
6706                 back->node.found_ref = 1;
6707                 back->found_ref += 1;
6708                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6709                         back->bytes = max_size;
6710                         back->disk_bytenr = bytenr;
6711
6712                         /* Need to reinsert if not already in the tree */
6713                         if (!insert) {
6714                                 rb_erase(&back->node.node, &rec->backref_tree);
6715                                 insert = true;
6716                         }
6717                 }
6718                 rec->refs += 1;
6719                 rec->content_checked = 1;
6720                 rec->owner_ref_checked = 1;
6721         } else {
6722                 if (back->node.found_extent_tree) {
6723                         fprintf(stderr, "Extent back ref already exists "
6724                                 "for %llu parent %llu root %llu "
6725                                 "owner %llu offset %llu num_refs %lu\n",
6726                                 (unsigned long long)bytenr,
6727                                 (unsigned long long)parent,
6728                                 (unsigned long long)root,
6729                                 (unsigned long long)owner,
6730                                 (unsigned long long)offset,
6731                                 (unsigned long)num_refs);
6732                 }
6733                 back->num_refs = num_refs;
6734                 back->node.found_extent_tree = 1;
6735         }
6736         if (insert)
6737                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6738                         compare_extent_backref));
6739
6740         maybe_free_extent_rec(extent_cache, rec);
6741         return 0;
6742 }
6743
6744 static int add_pending(struct cache_tree *pending,
6745                        struct cache_tree *seen, u64 bytenr, u32 size)
6746 {
6747         int ret;
6748         ret = add_cache_extent(seen, bytenr, size);
6749         if (ret)
6750                 return ret;
6751         add_cache_extent(pending, bytenr, size);
6752         return 0;
6753 }
6754
6755 static int pick_next_pending(struct cache_tree *pending,
6756                         struct cache_tree *reada,
6757                         struct cache_tree *nodes,
6758                         u64 last, struct block_info *bits, int bits_nr,
6759                         int *reada_bits)
6760 {
6761         unsigned long node_start = last;
6762         struct cache_extent *cache;
6763         int ret;
6764
6765         cache = search_cache_extent(reada, 0);
6766         if (cache) {
6767                 bits[0].start = cache->start;
6768                 bits[0].size = cache->size;
6769                 *reada_bits = 1;
6770                 return 1;
6771         }
6772         *reada_bits = 0;
6773         if (node_start > 32768)
6774                 node_start -= 32768;
6775
6776         cache = search_cache_extent(nodes, node_start);
6777         if (!cache)
6778                 cache = search_cache_extent(nodes, 0);
6779
6780         if (!cache) {
6781                  cache = search_cache_extent(pending, 0);
6782                  if (!cache)
6783                          return 0;
6784                  ret = 0;
6785                  do {
6786                          bits[ret].start = cache->start;
6787                          bits[ret].size = cache->size;
6788                          cache = next_cache_extent(cache);
6789                          ret++;
6790                  } while (cache && ret < bits_nr);
6791                  return ret;
6792         }
6793
6794         ret = 0;
6795         do {
6796                 bits[ret].start = cache->start;
6797                 bits[ret].size = cache->size;
6798                 cache = next_cache_extent(cache);
6799                 ret++;
6800         } while (cache && ret < bits_nr);
6801
6802         if (bits_nr - ret > 8) {
6803                 u64 lookup = bits[0].start + bits[0].size;
6804                 struct cache_extent *next;
6805                 next = search_cache_extent(pending, lookup);
6806                 while(next) {
6807                         if (next->start - lookup > 32768)
6808                                 break;
6809                         bits[ret].start = next->start;
6810                         bits[ret].size = next->size;
6811                         lookup = next->start + next->size;
6812                         ret++;
6813                         if (ret == bits_nr)
6814                                 break;
6815                         next = next_cache_extent(next);
6816                         if (!next)
6817                                 break;
6818                 }
6819         }
6820         return ret;
6821 }
6822
6823 static void free_chunk_record(struct cache_extent *cache)
6824 {
6825         struct chunk_record *rec;
6826
6827         rec = container_of(cache, struct chunk_record, cache);
6828         list_del_init(&rec->list);
6829         list_del_init(&rec->dextents);
6830         free(rec);
6831 }
6832
6833 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6834 {
6835         cache_tree_free_extents(chunk_cache, free_chunk_record);
6836 }
6837
6838 static void free_device_record(struct rb_node *node)
6839 {
6840         struct device_record *rec;
6841
6842         rec = container_of(node, struct device_record, node);
6843         free(rec);
6844 }
6845
6846 FREE_RB_BASED_TREE(device_cache, free_device_record);
6847
6848 int insert_block_group_record(struct block_group_tree *tree,
6849                               struct block_group_record *bg_rec)
6850 {
6851         int ret;
6852
6853         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6854         if (ret)
6855                 return ret;
6856
6857         list_add_tail(&bg_rec->list, &tree->block_groups);
6858         return 0;
6859 }
6860
6861 static void free_block_group_record(struct cache_extent *cache)
6862 {
6863         struct block_group_record *rec;
6864
6865         rec = container_of(cache, struct block_group_record, cache);
6866         list_del_init(&rec->list);
6867         free(rec);
6868 }
6869
6870 void free_block_group_tree(struct block_group_tree *tree)
6871 {
6872         cache_tree_free_extents(&tree->tree, free_block_group_record);
6873 }
6874
6875 int insert_device_extent_record(struct device_extent_tree *tree,
6876                                 struct device_extent_record *de_rec)
6877 {
6878         int ret;
6879
6880         /*
6881          * Device extent is a bit different from the other extents, because
6882          * the extents which belong to the different devices may have the
6883          * same start and size, so we need use the special extent cache
6884          * search/insert functions.
6885          */
6886         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6887         if (ret)
6888                 return ret;
6889
6890         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6891         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6892         return 0;
6893 }
6894
6895 static void free_device_extent_record(struct cache_extent *cache)
6896 {
6897         struct device_extent_record *rec;
6898
6899         rec = container_of(cache, struct device_extent_record, cache);
6900         if (!list_empty(&rec->chunk_list))
6901                 list_del_init(&rec->chunk_list);
6902         if (!list_empty(&rec->device_list))
6903                 list_del_init(&rec->device_list);
6904         free(rec);
6905 }
6906
6907 void free_device_extent_tree(struct device_extent_tree *tree)
6908 {
6909         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6910 }
6911
6912 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6913 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6914                                  struct extent_buffer *leaf, int slot)
6915 {
6916         struct btrfs_extent_ref_v0 *ref0;
6917         struct btrfs_key key;
6918         int ret;
6919
6920         btrfs_item_key_to_cpu(leaf, &key, slot);
6921         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6922         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6923                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6924                                 0, 0);
6925         } else {
6926                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6927                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6928         }
6929         return ret;
6930 }
6931 #endif
6932
6933 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6934                                             struct btrfs_key *key,
6935                                             int slot)
6936 {
6937         struct btrfs_chunk *ptr;
6938         struct chunk_record *rec;
6939         int num_stripes, i;
6940
6941         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6942         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6943
6944         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6945         if (!rec) {
6946                 fprintf(stderr, "memory allocation failed\n");
6947                 exit(-1);
6948         }
6949
6950         INIT_LIST_HEAD(&rec->list);
6951         INIT_LIST_HEAD(&rec->dextents);
6952         rec->bg_rec = NULL;
6953
6954         rec->cache.start = key->offset;
6955         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6956
6957         rec->generation = btrfs_header_generation(leaf);
6958
6959         rec->objectid = key->objectid;
6960         rec->type = key->type;
6961         rec->offset = key->offset;
6962
6963         rec->length = rec->cache.size;
6964         rec->owner = btrfs_chunk_owner(leaf, ptr);
6965         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6966         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6967         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6968         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6969         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6970         rec->num_stripes = num_stripes;
6971         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6972
6973         for (i = 0; i < rec->num_stripes; ++i) {
6974                 rec->stripes[i].devid =
6975                         btrfs_stripe_devid_nr(leaf, ptr, i);
6976                 rec->stripes[i].offset =
6977                         btrfs_stripe_offset_nr(leaf, ptr, i);
6978                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6979                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6980                                 BTRFS_UUID_SIZE);
6981         }
6982
6983         return rec;
6984 }
6985
6986 static int process_chunk_item(struct cache_tree *chunk_cache,
6987                               struct btrfs_key *key, struct extent_buffer *eb,
6988                               int slot)
6989 {
6990         struct chunk_record *rec;
6991         struct btrfs_chunk *chunk;
6992         int ret = 0;
6993
6994         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6995         /*
6996          * Do extra check for this chunk item,
6997          *
6998          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6999          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7000          * and owner<->key_type check.
7001          */
7002         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7003                                       key->offset);
7004         if (ret < 0) {
7005                 error("chunk(%llu, %llu) is not valid, ignore it",
7006                       key->offset, btrfs_chunk_length(eb, chunk));
7007                 return 0;
7008         }
7009         rec = btrfs_new_chunk_record(eb, key, slot);
7010         ret = insert_cache_extent(chunk_cache, &rec->cache);
7011         if (ret) {
7012                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7013                         rec->offset, rec->length);
7014                 free(rec);
7015         }
7016
7017         return ret;
7018 }
7019
7020 static int process_device_item(struct rb_root *dev_cache,
7021                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7022 {
7023         struct btrfs_dev_item *ptr;
7024         struct device_record *rec;
7025         int ret = 0;
7026
7027         ptr = btrfs_item_ptr(eb,
7028                 slot, struct btrfs_dev_item);
7029
7030         rec = malloc(sizeof(*rec));
7031         if (!rec) {
7032                 fprintf(stderr, "memory allocation failed\n");
7033                 return -ENOMEM;
7034         }
7035
7036         rec->devid = key->offset;
7037         rec->generation = btrfs_header_generation(eb);
7038
7039         rec->objectid = key->objectid;
7040         rec->type = key->type;
7041         rec->offset = key->offset;
7042
7043         rec->devid = btrfs_device_id(eb, ptr);
7044         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7045         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7046
7047         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7048         if (ret) {
7049                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7050                 free(rec);
7051         }
7052
7053         return ret;
7054 }
7055
7056 struct block_group_record *
7057 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7058                              int slot)
7059 {
7060         struct btrfs_block_group_item *ptr;
7061         struct block_group_record *rec;
7062
7063         rec = calloc(1, sizeof(*rec));
7064         if (!rec) {
7065                 fprintf(stderr, "memory allocation failed\n");
7066                 exit(-1);
7067         }
7068
7069         rec->cache.start = key->objectid;
7070         rec->cache.size = key->offset;
7071
7072         rec->generation = btrfs_header_generation(leaf);
7073
7074         rec->objectid = key->objectid;
7075         rec->type = key->type;
7076         rec->offset = key->offset;
7077
7078         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7079         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7080
7081         INIT_LIST_HEAD(&rec->list);
7082
7083         return rec;
7084 }
7085
7086 static int process_block_group_item(struct block_group_tree *block_group_cache,
7087                                     struct btrfs_key *key,
7088                                     struct extent_buffer *eb, int slot)
7089 {
7090         struct block_group_record *rec;
7091         int ret = 0;
7092
7093         rec = btrfs_new_block_group_record(eb, key, slot);
7094         ret = insert_block_group_record(block_group_cache, rec);
7095         if (ret) {
7096                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7097                         rec->objectid, rec->offset);
7098                 free(rec);
7099         }
7100
7101         return ret;
7102 }
7103
7104 struct device_extent_record *
7105 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7106                                struct btrfs_key *key, int slot)
7107 {
7108         struct device_extent_record *rec;
7109         struct btrfs_dev_extent *ptr;
7110
7111         rec = calloc(1, sizeof(*rec));
7112         if (!rec) {
7113                 fprintf(stderr, "memory allocation failed\n");
7114                 exit(-1);
7115         }
7116
7117         rec->cache.objectid = key->objectid;
7118         rec->cache.start = key->offset;
7119
7120         rec->generation = btrfs_header_generation(leaf);
7121
7122         rec->objectid = key->objectid;
7123         rec->type = key->type;
7124         rec->offset = key->offset;
7125
7126         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7127         rec->chunk_objecteid =
7128                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7129         rec->chunk_offset =
7130                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7131         rec->length = btrfs_dev_extent_length(leaf, ptr);
7132         rec->cache.size = rec->length;
7133
7134         INIT_LIST_HEAD(&rec->chunk_list);
7135         INIT_LIST_HEAD(&rec->device_list);
7136
7137         return rec;
7138 }
7139
7140 static int
7141 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7142                            struct btrfs_key *key, struct extent_buffer *eb,
7143                            int slot)
7144 {
7145         struct device_extent_record *rec;
7146         int ret;
7147
7148         rec = btrfs_new_device_extent_record(eb, key, slot);
7149         ret = insert_device_extent_record(dev_extent_cache, rec);
7150         if (ret) {
7151                 fprintf(stderr,
7152                         "Device extent[%llu, %llu, %llu] existed.\n",
7153                         rec->objectid, rec->offset, rec->length);
7154                 free(rec);
7155         }
7156
7157         return ret;
7158 }
7159
7160 static int process_extent_item(struct btrfs_root *root,
7161                                struct cache_tree *extent_cache,
7162                                struct extent_buffer *eb, int slot)
7163 {
7164         struct btrfs_extent_item *ei;
7165         struct btrfs_extent_inline_ref *iref;
7166         struct btrfs_extent_data_ref *dref;
7167         struct btrfs_shared_data_ref *sref;
7168         struct btrfs_key key;
7169         struct extent_record tmpl;
7170         unsigned long end;
7171         unsigned long ptr;
7172         int ret;
7173         int type;
7174         u32 item_size = btrfs_item_size_nr(eb, slot);
7175         u64 refs = 0;
7176         u64 offset;
7177         u64 num_bytes;
7178         int metadata = 0;
7179
7180         btrfs_item_key_to_cpu(eb, &key, slot);
7181
7182         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7183                 metadata = 1;
7184                 num_bytes = root->fs_info->nodesize;
7185         } else {
7186                 num_bytes = key.offset;
7187         }
7188
7189         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7190                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7191                       key.objectid, root->fs_info->sectorsize);
7192                 return -EIO;
7193         }
7194         if (item_size < sizeof(*ei)) {
7195 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7196                 struct btrfs_extent_item_v0 *ei0;
7197                 BUG_ON(item_size != sizeof(*ei0));
7198                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7199                 refs = btrfs_extent_refs_v0(eb, ei0);
7200 #else
7201                 BUG();
7202 #endif
7203                 memset(&tmpl, 0, sizeof(tmpl));
7204                 tmpl.start = key.objectid;
7205                 tmpl.nr = num_bytes;
7206                 tmpl.extent_item_refs = refs;
7207                 tmpl.metadata = metadata;
7208                 tmpl.found_rec = 1;
7209                 tmpl.max_size = num_bytes;
7210
7211                 return add_extent_rec(extent_cache, &tmpl);
7212         }
7213
7214         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7215         refs = btrfs_extent_refs(eb, ei);
7216         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7217                 metadata = 1;
7218         else
7219                 metadata = 0;
7220         if (metadata && num_bytes != root->fs_info->nodesize) {
7221                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7222                       num_bytes, root->fs_info->nodesize);
7223                 return -EIO;
7224         }
7225         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7226                 error("ignore invalid data extent, length %llu is not aligned to %u",
7227                       num_bytes, root->fs_info->sectorsize);
7228                 return -EIO;
7229         }
7230
7231         memset(&tmpl, 0, sizeof(tmpl));
7232         tmpl.start = key.objectid;
7233         tmpl.nr = num_bytes;
7234         tmpl.extent_item_refs = refs;
7235         tmpl.metadata = metadata;
7236         tmpl.found_rec = 1;
7237         tmpl.max_size = num_bytes;
7238         add_extent_rec(extent_cache, &tmpl);
7239
7240         ptr = (unsigned long)(ei + 1);
7241         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7242             key.type == BTRFS_EXTENT_ITEM_KEY)
7243                 ptr += sizeof(struct btrfs_tree_block_info);
7244
7245         end = (unsigned long)ei + item_size;
7246         while (ptr < end) {
7247                 iref = (struct btrfs_extent_inline_ref *)ptr;
7248                 type = btrfs_extent_inline_ref_type(eb, iref);
7249                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7250                 switch (type) {
7251                 case BTRFS_TREE_BLOCK_REF_KEY:
7252                         ret = add_tree_backref(extent_cache, key.objectid,
7253                                         0, offset, 0);
7254                         if (ret < 0)
7255                                 error(
7256                         "add_tree_backref failed (extent items tree block): %s",
7257                                       strerror(-ret));
7258                         break;
7259                 case BTRFS_SHARED_BLOCK_REF_KEY:
7260                         ret = add_tree_backref(extent_cache, key.objectid,
7261                                         offset, 0, 0);
7262                         if (ret < 0)
7263                                 error(
7264                         "add_tree_backref failed (extent items shared block): %s",
7265                                       strerror(-ret));
7266                         break;
7267                 case BTRFS_EXTENT_DATA_REF_KEY:
7268                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7269                         add_data_backref(extent_cache, key.objectid, 0,
7270                                         btrfs_extent_data_ref_root(eb, dref),
7271                                         btrfs_extent_data_ref_objectid(eb,
7272                                                                        dref),
7273                                         btrfs_extent_data_ref_offset(eb, dref),
7274                                         btrfs_extent_data_ref_count(eb, dref),
7275                                         0, num_bytes);
7276                         break;
7277                 case BTRFS_SHARED_DATA_REF_KEY:
7278                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7279                         add_data_backref(extent_cache, key.objectid, offset,
7280                                         0, 0, 0,
7281                                         btrfs_shared_data_ref_count(eb, sref),
7282                                         0, num_bytes);
7283                         break;
7284                 default:
7285                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7286                                 key.objectid, key.type, num_bytes);
7287                         goto out;
7288                 }
7289                 ptr += btrfs_extent_inline_ref_size(type);
7290         }
7291         WARN_ON(ptr > end);
7292 out:
7293         return 0;
7294 }
7295
7296 static int check_cache_range(struct btrfs_root *root,
7297                              struct btrfs_block_group_cache *cache,
7298                              u64 offset, u64 bytes)
7299 {
7300         struct btrfs_free_space *entry;
7301         u64 *logical;
7302         u64 bytenr;
7303         int stripe_len;
7304         int i, nr, ret;
7305
7306         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7307                 bytenr = btrfs_sb_offset(i);
7308                 ret = btrfs_rmap_block(root->fs_info,
7309                                        cache->key.objectid, bytenr, 0,
7310                                        &logical, &nr, &stripe_len);
7311                 if (ret)
7312                         return ret;
7313
7314                 while (nr--) {
7315                         if (logical[nr] + stripe_len <= offset)
7316                                 continue;
7317                         if (offset + bytes <= logical[nr])
7318                                 continue;
7319                         if (logical[nr] == offset) {
7320                                 if (stripe_len >= bytes) {
7321                                         free(logical);
7322                                         return 0;
7323                                 }
7324                                 bytes -= stripe_len;
7325                                 offset += stripe_len;
7326                         } else if (logical[nr] < offset) {
7327                                 if (logical[nr] + stripe_len >=
7328                                     offset + bytes) {
7329                                         free(logical);
7330                                         return 0;
7331                                 }
7332                                 bytes = (offset + bytes) -
7333                                         (logical[nr] + stripe_len);
7334                                 offset = logical[nr] + stripe_len;
7335                         } else {
7336                                 /*
7337                                  * Could be tricky, the super may land in the
7338                                  * middle of the area we're checking.  First
7339                                  * check the easiest case, it's at the end.
7340                                  */
7341                                 if (logical[nr] + stripe_len >=
7342                                     bytes + offset) {
7343                                         bytes = logical[nr] - offset;
7344                                         continue;
7345                                 }
7346
7347                                 /* Check the left side */
7348                                 ret = check_cache_range(root, cache,
7349                                                         offset,
7350                                                         logical[nr] - offset);
7351                                 if (ret) {
7352                                         free(logical);
7353                                         return ret;
7354                                 }
7355
7356                                 /* Now we continue with the right side */
7357                                 bytes = (offset + bytes) -
7358                                         (logical[nr] + stripe_len);
7359                                 offset = logical[nr] + stripe_len;
7360                         }
7361                 }
7362
7363                 free(logical);
7364         }
7365
7366         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7367         if (!entry) {
7368                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7369                         offset, offset+bytes);
7370                 return -EINVAL;
7371         }
7372
7373         if (entry->offset != offset) {
7374                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7375                         entry->offset);
7376                 return -EINVAL;
7377         }
7378
7379         if (entry->bytes != bytes) {
7380                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7381                         bytes, entry->bytes, offset);
7382                 return -EINVAL;
7383         }
7384
7385         unlink_free_space(cache->free_space_ctl, entry);
7386         free(entry);
7387         return 0;
7388 }
7389
7390 static int verify_space_cache(struct btrfs_root *root,
7391                               struct btrfs_block_group_cache *cache)
7392 {
7393         struct btrfs_path path;
7394         struct extent_buffer *leaf;
7395         struct btrfs_key key;
7396         u64 last;
7397         int ret = 0;
7398
7399         root = root->fs_info->extent_root;
7400
7401         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7402
7403         btrfs_init_path(&path);
7404         key.objectid = last;
7405         key.offset = 0;
7406         key.type = BTRFS_EXTENT_ITEM_KEY;
7407         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7408         if (ret < 0)
7409                 goto out;
7410         ret = 0;
7411         while (1) {
7412                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7413                         ret = btrfs_next_leaf(root, &path);
7414                         if (ret < 0)
7415                                 goto out;
7416                         if (ret > 0) {
7417                                 ret = 0;
7418                                 break;
7419                         }
7420                 }
7421                 leaf = path.nodes[0];
7422                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7423                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7424                         break;
7425                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7426                     key.type != BTRFS_METADATA_ITEM_KEY) {
7427                         path.slots[0]++;
7428                         continue;
7429                 }
7430
7431                 if (last == key.objectid) {
7432                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7433                                 last = key.objectid + key.offset;
7434                         else
7435                                 last = key.objectid + root->fs_info->nodesize;
7436                         path.slots[0]++;
7437                         continue;
7438                 }
7439
7440                 ret = check_cache_range(root, cache, last,
7441                                         key.objectid - last);
7442                 if (ret)
7443                         break;
7444                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7445                         last = key.objectid + key.offset;
7446                 else
7447                         last = key.objectid + root->fs_info->nodesize;
7448                 path.slots[0]++;
7449         }
7450
7451         if (last < cache->key.objectid + cache->key.offset)
7452                 ret = check_cache_range(root, cache, last,
7453                                         cache->key.objectid +
7454                                         cache->key.offset - last);
7455
7456 out:
7457         btrfs_release_path(&path);
7458
7459         if (!ret &&
7460             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7461                 fprintf(stderr, "There are still entries left in the space "
7462                         "cache\n");
7463                 ret = -EINVAL;
7464         }
7465
7466         return ret;
7467 }
7468
7469 static int check_space_cache(struct btrfs_root *root)
7470 {
7471         struct btrfs_block_group_cache *cache;
7472         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7473         int ret;
7474         int error = 0;
7475
7476         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7477             btrfs_super_generation(root->fs_info->super_copy) !=
7478             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7479                 printf("cache and super generation don't match, space cache "
7480                        "will be invalidated\n");
7481                 return 0;
7482         }
7483
7484         if (ctx.progress_enabled) {
7485                 ctx.tp = TASK_FREE_SPACE;
7486                 task_start(ctx.info);
7487         }
7488
7489         while (1) {
7490                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7491                 if (!cache)
7492                         break;
7493
7494                 start = cache->key.objectid + cache->key.offset;
7495                 if (!cache->free_space_ctl) {
7496                         if (btrfs_init_free_space_ctl(cache,
7497                                                 root->fs_info->sectorsize)) {
7498                                 ret = -ENOMEM;
7499                                 break;
7500                         }
7501                 } else {
7502                         btrfs_remove_free_space_cache(cache);
7503                 }
7504
7505                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7506                         ret = exclude_super_stripes(root, cache);
7507                         if (ret) {
7508                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7509                                         strerror(-ret));
7510                                 error++;
7511                                 continue;
7512                         }
7513                         ret = load_free_space_tree(root->fs_info, cache);
7514                         free_excluded_extents(root, cache);
7515                         if (ret < 0) {
7516                                 fprintf(stderr, "could not load free space tree: %s\n",
7517                                         strerror(-ret));
7518                                 error++;
7519                                 continue;
7520                         }
7521                         error += ret;
7522                 } else {
7523                         ret = load_free_space_cache(root->fs_info, cache);
7524                         if (!ret)
7525                                 continue;
7526                 }
7527
7528                 ret = verify_space_cache(root, cache);
7529                 if (ret) {
7530                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7531                                 cache->key.objectid);
7532                         error++;
7533                 }
7534         }
7535
7536         task_stop(ctx.info);
7537
7538         return error ? -EINVAL : 0;
7539 }
7540
7541 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7542                         u64 num_bytes, unsigned long leaf_offset,
7543                         struct extent_buffer *eb) {
7544
7545         struct btrfs_fs_info *fs_info = root->fs_info;
7546         u64 offset = 0;
7547         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7548         char *data;
7549         unsigned long csum_offset;
7550         u32 csum;
7551         u32 csum_expected;
7552         u64 read_len;
7553         u64 data_checked = 0;
7554         u64 tmp;
7555         int ret = 0;
7556         int mirror;
7557         int num_copies;
7558
7559         if (num_bytes % fs_info->sectorsize)
7560                 return -EINVAL;
7561
7562         data = malloc(num_bytes);
7563         if (!data)
7564                 return -ENOMEM;
7565
7566         while (offset < num_bytes) {
7567                 mirror = 0;
7568 again:
7569                 read_len = num_bytes - offset;
7570                 /* read as much space once a time */
7571                 ret = read_extent_data(fs_info, data + offset,
7572                                 bytenr + offset, &read_len, mirror);
7573                 if (ret)
7574                         goto out;
7575                 data_checked = 0;
7576                 /* verify every 4k data's checksum */
7577                 while (data_checked < read_len) {
7578                         csum = ~(u32)0;
7579                         tmp = offset + data_checked;
7580
7581                         csum = btrfs_csum_data((char *)data + tmp,
7582                                                csum, fs_info->sectorsize);
7583                         btrfs_csum_final(csum, (u8 *)&csum);
7584
7585                         csum_offset = leaf_offset +
7586                                  tmp / fs_info->sectorsize * csum_size;
7587                         read_extent_buffer(eb, (char *)&csum_expected,
7588                                            csum_offset, csum_size);
7589                         /* try another mirror */
7590                         if (csum != csum_expected) {
7591                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7592                                                 mirror, bytenr + tmp,
7593                                                 csum, csum_expected);
7594                                 num_copies = btrfs_num_copies(root->fs_info,
7595                                                 bytenr, num_bytes);
7596                                 if (mirror < num_copies - 1) {
7597                                         mirror += 1;
7598                                         goto again;
7599                                 }
7600                         }
7601                         data_checked += fs_info->sectorsize;
7602                 }
7603                 offset += read_len;
7604         }
7605 out:
7606         free(data);
7607         return ret;
7608 }
7609
7610 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7611                                u64 num_bytes)
7612 {
7613         struct btrfs_path path;
7614         struct extent_buffer *leaf;
7615         struct btrfs_key key;
7616         int ret;
7617
7618         btrfs_init_path(&path);
7619         key.objectid = bytenr;
7620         key.type = BTRFS_EXTENT_ITEM_KEY;
7621         key.offset = (u64)-1;
7622
7623 again:
7624         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7625                                 0, 0);
7626         if (ret < 0) {
7627                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7628                 btrfs_release_path(&path);
7629                 return ret;
7630         } else if (ret) {
7631                 if (path.slots[0] > 0) {
7632                         path.slots[0]--;
7633                 } else {
7634                         ret = btrfs_prev_leaf(root, &path);
7635                         if (ret < 0) {
7636                                 goto out;
7637                         } else if (ret > 0) {
7638                                 ret = 0;
7639                                 goto out;
7640                         }
7641                 }
7642         }
7643
7644         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7645
7646         /*
7647          * Block group items come before extent items if they have the same
7648          * bytenr, so walk back one more just in case.  Dear future traveller,
7649          * first congrats on mastering time travel.  Now if it's not too much
7650          * trouble could you go back to 2006 and tell Chris to make the
7651          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7652          * EXTENT_ITEM_KEY please?
7653          */
7654         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7655                 if (path.slots[0] > 0) {
7656                         path.slots[0]--;
7657                 } else {
7658                         ret = btrfs_prev_leaf(root, &path);
7659                         if (ret < 0) {
7660                                 goto out;
7661                         } else if (ret > 0) {
7662                                 ret = 0;
7663                                 goto out;
7664                         }
7665                 }
7666                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7667         }
7668
7669         while (num_bytes) {
7670                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7671                         ret = btrfs_next_leaf(root, &path);
7672                         if (ret < 0) {
7673                                 fprintf(stderr, "Error going to next leaf "
7674                                         "%d\n", ret);
7675                                 btrfs_release_path(&path);
7676                                 return ret;
7677                         } else if (ret) {
7678                                 break;
7679                         }
7680                 }
7681                 leaf = path.nodes[0];
7682                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7683                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7684                         path.slots[0]++;
7685                         continue;
7686                 }
7687                 if (key.objectid + key.offset < bytenr) {
7688                         path.slots[0]++;
7689                         continue;
7690                 }
7691                 if (key.objectid > bytenr + num_bytes)
7692                         break;
7693
7694                 if (key.objectid == bytenr) {
7695                         if (key.offset >= num_bytes) {
7696                                 num_bytes = 0;
7697                                 break;
7698                         }
7699                         num_bytes -= key.offset;
7700                         bytenr += key.offset;
7701                 } else if (key.objectid < bytenr) {
7702                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7703                                 num_bytes = 0;
7704                                 break;
7705                         }
7706                         num_bytes = (bytenr + num_bytes) -
7707                                 (key.objectid + key.offset);
7708                         bytenr = key.objectid + key.offset;
7709                 } else {
7710                         if (key.objectid + key.offset < bytenr + num_bytes) {
7711                                 u64 new_start = key.objectid + key.offset;
7712                                 u64 new_bytes = bytenr + num_bytes - new_start;
7713
7714                                 /*
7715                                  * Weird case, the extent is in the middle of
7716                                  * our range, we'll have to search one side
7717                                  * and then the other.  Not sure if this happens
7718                                  * in real life, but no harm in coding it up
7719                                  * anyway just in case.
7720                                  */
7721                                 btrfs_release_path(&path);
7722                                 ret = check_extent_exists(root, new_start,
7723                                                           new_bytes);
7724                                 if (ret) {
7725                                         fprintf(stderr, "Right section didn't "
7726                                                 "have a record\n");
7727                                         break;
7728                                 }
7729                                 num_bytes = key.objectid - bytenr;
7730                                 goto again;
7731                         }
7732                         num_bytes = key.objectid - bytenr;
7733                 }
7734                 path.slots[0]++;
7735         }
7736         ret = 0;
7737
7738 out:
7739         if (num_bytes && !ret) {
7740                 fprintf(stderr, "There are no extents for csum range "
7741                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7742                 ret = 1;
7743         }
7744
7745         btrfs_release_path(&path);
7746         return ret;
7747 }
7748
7749 static int check_csums(struct btrfs_root *root)
7750 {
7751         struct btrfs_path path;
7752         struct extent_buffer *leaf;
7753         struct btrfs_key key;
7754         u64 offset = 0, num_bytes = 0;
7755         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7756         int errors = 0;
7757         int ret;
7758         u64 data_len;
7759         unsigned long leaf_offset;
7760
7761         root = root->fs_info->csum_root;
7762         if (!extent_buffer_uptodate(root->node)) {
7763                 fprintf(stderr, "No valid csum tree found\n");
7764                 return -ENOENT;
7765         }
7766
7767         btrfs_init_path(&path);
7768         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7769         key.type = BTRFS_EXTENT_CSUM_KEY;
7770         key.offset = 0;
7771         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7772         if (ret < 0) {
7773                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7774                 btrfs_release_path(&path);
7775                 return ret;
7776         }
7777
7778         if (ret > 0 && path.slots[0])
7779                 path.slots[0]--;
7780         ret = 0;
7781
7782         while (1) {
7783                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7784                         ret = btrfs_next_leaf(root, &path);
7785                         if (ret < 0) {
7786                                 fprintf(stderr, "Error going to next leaf "
7787                                         "%d\n", ret);
7788                                 break;
7789                         }
7790                         if (ret)
7791                                 break;
7792                 }
7793                 leaf = path.nodes[0];
7794
7795                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7796                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7797                         path.slots[0]++;
7798                         continue;
7799                 }
7800
7801                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7802                               csum_size) * root->fs_info->sectorsize;
7803                 if (!check_data_csum)
7804                         goto skip_csum_check;
7805                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7806                 ret = check_extent_csums(root, key.offset, data_len,
7807                                          leaf_offset, leaf);
7808                 if (ret)
7809                         break;
7810 skip_csum_check:
7811                 if (!num_bytes) {
7812                         offset = key.offset;
7813                 } else if (key.offset != offset + num_bytes) {
7814                         ret = check_extent_exists(root, offset, num_bytes);
7815                         if (ret) {
7816                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7817                                         "there is no extent record\n",
7818                                         offset, offset+num_bytes);
7819                                 errors++;
7820                         }
7821                         offset = key.offset;
7822                         num_bytes = 0;
7823                 }
7824                 num_bytes += data_len;
7825                 path.slots[0]++;
7826         }
7827
7828         btrfs_release_path(&path);
7829         return errors;
7830 }
7831
7832 static int is_dropped_key(struct btrfs_key *key,
7833                           struct btrfs_key *drop_key) {
7834         if (key->objectid < drop_key->objectid)
7835                 return 1;
7836         else if (key->objectid == drop_key->objectid) {
7837                 if (key->type < drop_key->type)
7838                         return 1;
7839                 else if (key->type == drop_key->type) {
7840                         if (key->offset < drop_key->offset)
7841                                 return 1;
7842                 }
7843         }
7844         return 0;
7845 }
7846
7847 /*
7848  * Here are the rules for FULL_BACKREF.
7849  *
7850  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7851  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7852  *      FULL_BACKREF set.
7853  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7854  *    if it happened after the relocation occurred since we'll have dropped the
7855  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7856  *    have no real way to know for sure.
7857  *
7858  * We process the blocks one root at a time, and we start from the lowest root
7859  * objectid and go to the highest.  So we can just lookup the owner backref for
7860  * the record and if we don't find it then we know it doesn't exist and we have
7861  * a FULL BACKREF.
7862  *
7863  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7864  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7865  * be set or not and then we can check later once we've gathered all the refs.
7866  */
7867 static int calc_extent_flag(struct cache_tree *extent_cache,
7868                            struct extent_buffer *buf,
7869                            struct root_item_record *ri,
7870                            u64 *flags)
7871 {
7872         struct extent_record *rec;
7873         struct cache_extent *cache;
7874         struct tree_backref *tback;
7875         u64 owner = 0;
7876
7877         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7878         /* we have added this extent before */
7879         if (!cache)
7880                 return -ENOENT;
7881
7882         rec = container_of(cache, struct extent_record, cache);
7883
7884         /*
7885          * Except file/reloc tree, we can not have
7886          * FULL BACKREF MODE
7887          */
7888         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7889                 goto normal;
7890         /*
7891          * root node
7892          */
7893         if (buf->start == ri->bytenr)
7894                 goto normal;
7895
7896         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7897                 goto full_backref;
7898
7899         owner = btrfs_header_owner(buf);
7900         if (owner == ri->objectid)
7901                 goto normal;
7902
7903         tback = find_tree_backref(rec, 0, owner);
7904         if (!tback)
7905                 goto full_backref;
7906 normal:
7907         *flags = 0;
7908         if (rec->flag_block_full_backref != FLAG_UNSET &&
7909             rec->flag_block_full_backref != 0)
7910                 rec->bad_full_backref = 1;
7911         return 0;
7912 full_backref:
7913         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7914         if (rec->flag_block_full_backref != FLAG_UNSET &&
7915             rec->flag_block_full_backref != 1)
7916                 rec->bad_full_backref = 1;
7917         return 0;
7918 }
7919
7920 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7921 {
7922         fprintf(stderr, "Invalid key type(");
7923         print_key_type(stderr, 0, key_type);
7924         fprintf(stderr, ") found in root(");
7925         print_objectid(stderr, rootid, 0);
7926         fprintf(stderr, ")\n");
7927 }
7928
7929 /*
7930  * Check if the key is valid with its extent buffer.
7931  *
7932  * This is a early check in case invalid key exists in a extent buffer
7933  * This is not comprehensive yet, but should prevent wrong key/item passed
7934  * further
7935  */
7936 static int check_type_with_root(u64 rootid, u8 key_type)
7937 {
7938         switch (key_type) {
7939         /* Only valid in chunk tree */
7940         case BTRFS_DEV_ITEM_KEY:
7941         case BTRFS_CHUNK_ITEM_KEY:
7942                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7943                         goto err;
7944                 break;
7945         /* valid in csum and log tree */
7946         case BTRFS_CSUM_TREE_OBJECTID:
7947                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7948                       is_fstree(rootid)))
7949                         goto err;
7950                 break;
7951         case BTRFS_EXTENT_ITEM_KEY:
7952         case BTRFS_METADATA_ITEM_KEY:
7953         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7954                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7955                         goto err;
7956                 break;
7957         case BTRFS_ROOT_ITEM_KEY:
7958                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7959                         goto err;
7960                 break;
7961         case BTRFS_DEV_EXTENT_KEY:
7962                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7963                         goto err;
7964                 break;
7965         }
7966         return 0;
7967 err:
7968         report_mismatch_key_root(key_type, rootid);
7969         return -EINVAL;
7970 }
7971
7972 static int run_next_block(struct btrfs_root *root,
7973                           struct block_info *bits,
7974                           int bits_nr,
7975                           u64 *last,
7976                           struct cache_tree *pending,
7977                           struct cache_tree *seen,
7978                           struct cache_tree *reada,
7979                           struct cache_tree *nodes,
7980                           struct cache_tree *extent_cache,
7981                           struct cache_tree *chunk_cache,
7982                           struct rb_root *dev_cache,
7983                           struct block_group_tree *block_group_cache,
7984                           struct device_extent_tree *dev_extent_cache,
7985                           struct root_item_record *ri)
7986 {
7987         struct btrfs_fs_info *fs_info = root->fs_info;
7988         struct extent_buffer *buf;
7989         struct extent_record *rec = NULL;
7990         u64 bytenr;
7991         u32 size;
7992         u64 parent;
7993         u64 owner;
7994         u64 flags;
7995         u64 ptr;
7996         u64 gen = 0;
7997         int ret = 0;
7998         int i;
7999         int nritems;
8000         struct btrfs_key key;
8001         struct cache_extent *cache;
8002         int reada_bits;
8003
8004         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8005                                     bits_nr, &reada_bits);
8006         if (nritems == 0)
8007                 return 1;
8008
8009         if (!reada_bits) {
8010                 for(i = 0; i < nritems; i++) {
8011                         ret = add_cache_extent(reada, bits[i].start,
8012                                                bits[i].size);
8013                         if (ret == -EEXIST)
8014                                 continue;
8015
8016                         /* fixme, get the parent transid */
8017                         readahead_tree_block(fs_info, bits[i].start, 0);
8018                 }
8019         }
8020         *last = bits[0].start;
8021         bytenr = bits[0].start;
8022         size = bits[0].size;
8023
8024         cache = lookup_cache_extent(pending, bytenr, size);
8025         if (cache) {
8026                 remove_cache_extent(pending, cache);
8027                 free(cache);
8028         }
8029         cache = lookup_cache_extent(reada, bytenr, size);
8030         if (cache) {
8031                 remove_cache_extent(reada, cache);
8032                 free(cache);
8033         }
8034         cache = lookup_cache_extent(nodes, bytenr, size);
8035         if (cache) {
8036                 remove_cache_extent(nodes, cache);
8037                 free(cache);
8038         }
8039         cache = lookup_cache_extent(extent_cache, bytenr, size);
8040         if (cache) {
8041                 rec = container_of(cache, struct extent_record, cache);
8042                 gen = rec->parent_generation;
8043         }
8044
8045         /* fixme, get the real parent transid */
8046         buf = read_tree_block(root->fs_info, bytenr, gen);
8047         if (!extent_buffer_uptodate(buf)) {
8048                 record_bad_block_io(root->fs_info,
8049                                     extent_cache, bytenr, size);
8050                 goto out;
8051         }
8052
8053         nritems = btrfs_header_nritems(buf);
8054
8055         flags = 0;
8056         if (!init_extent_tree) {
8057                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8058                                        btrfs_header_level(buf), 1, NULL,
8059                                        &flags);
8060                 if (ret < 0) {
8061                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8062                         if (ret < 0) {
8063                                 fprintf(stderr, "Couldn't calc extent flags\n");
8064                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8065                         }
8066                 }
8067         } else {
8068                 flags = 0;
8069                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8070                 if (ret < 0) {
8071                         fprintf(stderr, "Couldn't calc extent flags\n");
8072                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8073                 }
8074         }
8075
8076         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8077                 if (ri != NULL &&
8078                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8079                     ri->objectid == btrfs_header_owner(buf)) {
8080                         /*
8081                          * Ok we got to this block from it's original owner and
8082                          * we have FULL_BACKREF set.  Relocation can leave
8083                          * converted blocks over so this is altogether possible,
8084                          * however it's not possible if the generation > the
8085                          * last snapshot, so check for this case.
8086                          */
8087                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8088                             btrfs_header_generation(buf) > ri->last_snapshot) {
8089                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8090                                 rec->bad_full_backref = 1;
8091                         }
8092                 }
8093         } else {
8094                 if (ri != NULL &&
8095                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8096                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8097                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8098                         rec->bad_full_backref = 1;
8099                 }
8100         }
8101
8102         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8103                 rec->flag_block_full_backref = 1;
8104                 parent = bytenr;
8105                 owner = 0;
8106         } else {
8107                 rec->flag_block_full_backref = 0;
8108                 parent = 0;
8109                 owner = btrfs_header_owner(buf);
8110         }
8111
8112         ret = check_block(root, extent_cache, buf, flags);
8113         if (ret)
8114                 goto out;
8115
8116         if (btrfs_is_leaf(buf)) {
8117                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8118                 for (i = 0; i < nritems; i++) {
8119                         struct btrfs_file_extent_item *fi;
8120                         btrfs_item_key_to_cpu(buf, &key, i);
8121                         /*
8122                          * Check key type against the leaf owner.
8123                          * Could filter quite a lot of early error if
8124                          * owner is correct
8125                          */
8126                         if (check_type_with_root(btrfs_header_owner(buf),
8127                                                  key.type)) {
8128                                 fprintf(stderr, "ignoring invalid key\n");
8129                                 continue;
8130                         }
8131                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8132                                 process_extent_item(root, extent_cache, buf,
8133                                                     i);
8134                                 continue;
8135                         }
8136                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8137                                 process_extent_item(root, extent_cache, buf,
8138                                                     i);
8139                                 continue;
8140                         }
8141                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8142                                 total_csum_bytes +=
8143                                         btrfs_item_size_nr(buf, i);
8144                                 continue;
8145                         }
8146                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8147                                 process_chunk_item(chunk_cache, &key, buf, i);
8148                                 continue;
8149                         }
8150                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8151                                 process_device_item(dev_cache, &key, buf, i);
8152                                 continue;
8153                         }
8154                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8155                                 process_block_group_item(block_group_cache,
8156                                         &key, buf, i);
8157                                 continue;
8158                         }
8159                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8160                                 process_device_extent_item(dev_extent_cache,
8161                                         &key, buf, i);
8162                                 continue;
8163
8164                         }
8165                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8166 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8167                                 process_extent_ref_v0(extent_cache, buf, i);
8168 #else
8169                                 BUG();
8170 #endif
8171                                 continue;
8172                         }
8173
8174                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8175                                 ret = add_tree_backref(extent_cache,
8176                                                 key.objectid, 0, key.offset, 0);
8177                                 if (ret < 0)
8178                                         error(
8179                                 "add_tree_backref failed (leaf tree block): %s",
8180                                               strerror(-ret));
8181                                 continue;
8182                         }
8183                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8184                                 ret = add_tree_backref(extent_cache,
8185                                                 key.objectid, key.offset, 0, 0);
8186                                 if (ret < 0)
8187                                         error(
8188                                 "add_tree_backref failed (leaf shared block): %s",
8189                                               strerror(-ret));
8190                                 continue;
8191                         }
8192                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8193                                 struct btrfs_extent_data_ref *ref;
8194                                 ref = btrfs_item_ptr(buf, i,
8195                                                 struct btrfs_extent_data_ref);
8196                                 add_data_backref(extent_cache,
8197                                         key.objectid, 0,
8198                                         btrfs_extent_data_ref_root(buf, ref),
8199                                         btrfs_extent_data_ref_objectid(buf,
8200                                                                        ref),
8201                                         btrfs_extent_data_ref_offset(buf, ref),
8202                                         btrfs_extent_data_ref_count(buf, ref),
8203                                         0, root->fs_info->sectorsize);
8204                                 continue;
8205                         }
8206                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8207                                 struct btrfs_shared_data_ref *ref;
8208                                 ref = btrfs_item_ptr(buf, i,
8209                                                 struct btrfs_shared_data_ref);
8210                                 add_data_backref(extent_cache,
8211                                         key.objectid, key.offset, 0, 0, 0,
8212                                         btrfs_shared_data_ref_count(buf, ref),
8213                                         0, root->fs_info->sectorsize);
8214                                 continue;
8215                         }
8216                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8217                                 struct bad_item *bad;
8218
8219                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8220                                         continue;
8221                                 if (!owner)
8222                                         continue;
8223                                 bad = malloc(sizeof(struct bad_item));
8224                                 if (!bad)
8225                                         continue;
8226                                 INIT_LIST_HEAD(&bad->list);
8227                                 memcpy(&bad->key, &key,
8228                                        sizeof(struct btrfs_key));
8229                                 bad->root_id = owner;
8230                                 list_add_tail(&bad->list, &delete_items);
8231                                 continue;
8232                         }
8233                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8234                                 continue;
8235                         fi = btrfs_item_ptr(buf, i,
8236                                             struct btrfs_file_extent_item);
8237                         if (btrfs_file_extent_type(buf, fi) ==
8238                             BTRFS_FILE_EXTENT_INLINE)
8239                                 continue;
8240                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8241                                 continue;
8242
8243                         data_bytes_allocated +=
8244                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8245                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8246                                 abort();
8247                         }
8248                         data_bytes_referenced +=
8249                                 btrfs_file_extent_num_bytes(buf, fi);
8250                         add_data_backref(extent_cache,
8251                                 btrfs_file_extent_disk_bytenr(buf, fi),
8252                                 parent, owner, key.objectid, key.offset -
8253                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8254                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8255                 }
8256         } else {
8257                 int level;
8258                 struct btrfs_key first_key;
8259
8260                 first_key.objectid = 0;
8261
8262                 if (nritems > 0)
8263                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8264                 level = btrfs_header_level(buf);
8265                 for (i = 0; i < nritems; i++) {
8266                         struct extent_record tmpl;
8267
8268                         ptr = btrfs_node_blockptr(buf, i);
8269                         size = root->fs_info->nodesize;
8270                         btrfs_node_key_to_cpu(buf, &key, i);
8271                         if (ri != NULL) {
8272                                 if ((level == ri->drop_level)
8273                                     && is_dropped_key(&key, &ri->drop_key)) {
8274                                         continue;
8275                                 }
8276                         }
8277
8278                         memset(&tmpl, 0, sizeof(tmpl));
8279                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8280                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8281                         tmpl.start = ptr;
8282                         tmpl.nr = size;
8283                         tmpl.refs = 1;
8284                         tmpl.metadata = 1;
8285                         tmpl.max_size = size;
8286                         ret = add_extent_rec(extent_cache, &tmpl);
8287                         if (ret < 0)
8288                                 goto out;
8289
8290                         ret = add_tree_backref(extent_cache, ptr, parent,
8291                                         owner, 1);
8292                         if (ret < 0) {
8293                                 error(
8294                                 "add_tree_backref failed (non-leaf block): %s",
8295                                       strerror(-ret));
8296                                 continue;
8297                         }
8298
8299                         if (level > 1) {
8300                                 add_pending(nodes, seen, ptr, size);
8301                         } else {
8302                                 add_pending(pending, seen, ptr, size);
8303                         }
8304                 }
8305                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8306                                       nritems) * sizeof(struct btrfs_key_ptr);
8307         }
8308         total_btree_bytes += buf->len;
8309         if (fs_root_objectid(btrfs_header_owner(buf)))
8310                 total_fs_tree_bytes += buf->len;
8311         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8312                 total_extent_tree_bytes += buf->len;
8313 out:
8314         free_extent_buffer(buf);
8315         return ret;
8316 }
8317
8318 static int add_root_to_pending(struct extent_buffer *buf,
8319                                struct cache_tree *extent_cache,
8320                                struct cache_tree *pending,
8321                                struct cache_tree *seen,
8322                                struct cache_tree *nodes,
8323                                u64 objectid)
8324 {
8325         struct extent_record tmpl;
8326         int ret;
8327
8328         if (btrfs_header_level(buf) > 0)
8329                 add_pending(nodes, seen, buf->start, buf->len);
8330         else
8331                 add_pending(pending, seen, buf->start, buf->len);
8332
8333         memset(&tmpl, 0, sizeof(tmpl));
8334         tmpl.start = buf->start;
8335         tmpl.nr = buf->len;
8336         tmpl.is_root = 1;
8337         tmpl.refs = 1;
8338         tmpl.metadata = 1;
8339         tmpl.max_size = buf->len;
8340         add_extent_rec(extent_cache, &tmpl);
8341
8342         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8343             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8344                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8345                                 0, 1);
8346         else
8347                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8348                                 1);
8349         return ret;
8350 }
8351
8352 /* as we fix the tree, we might be deleting blocks that
8353  * we're tracking for repair.  This hook makes sure we
8354  * remove any backrefs for blocks as we are fixing them.
8355  */
8356 static int free_extent_hook(struct btrfs_trans_handle *trans,
8357                             struct btrfs_root *root,
8358                             u64 bytenr, u64 num_bytes, u64 parent,
8359                             u64 root_objectid, u64 owner, u64 offset,
8360                             int refs_to_drop)
8361 {
8362         struct extent_record *rec;
8363         struct cache_extent *cache;
8364         int is_data;
8365         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8366
8367         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8368         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8369         if (!cache)
8370                 return 0;
8371
8372         rec = container_of(cache, struct extent_record, cache);
8373         if (is_data) {
8374                 struct data_backref *back;
8375                 back = find_data_backref(rec, parent, root_objectid, owner,
8376                                          offset, 1, bytenr, num_bytes);
8377                 if (!back)
8378                         goto out;
8379                 if (back->node.found_ref) {
8380                         back->found_ref -= refs_to_drop;
8381                         if (rec->refs)
8382                                 rec->refs -= refs_to_drop;
8383                 }
8384                 if (back->node.found_extent_tree) {
8385                         back->num_refs -= refs_to_drop;
8386                         if (rec->extent_item_refs)
8387                                 rec->extent_item_refs -= refs_to_drop;
8388                 }
8389                 if (back->found_ref == 0)
8390                         back->node.found_ref = 0;
8391                 if (back->num_refs == 0)
8392                         back->node.found_extent_tree = 0;
8393
8394                 if (!back->node.found_extent_tree && back->node.found_ref) {
8395                         rb_erase(&back->node.node, &rec->backref_tree);
8396                         free(back);
8397                 }
8398         } else {
8399                 struct tree_backref *back;
8400                 back = find_tree_backref(rec, parent, root_objectid);
8401                 if (!back)
8402                         goto out;
8403                 if (back->node.found_ref) {
8404                         if (rec->refs)
8405                                 rec->refs--;
8406                         back->node.found_ref = 0;
8407                 }
8408                 if (back->node.found_extent_tree) {
8409                         if (rec->extent_item_refs)
8410                                 rec->extent_item_refs--;
8411                         back->node.found_extent_tree = 0;
8412                 }
8413                 if (!back->node.found_extent_tree && back->node.found_ref) {
8414                         rb_erase(&back->node.node, &rec->backref_tree);
8415                         free(back);
8416                 }
8417         }
8418         maybe_free_extent_rec(extent_cache, rec);
8419 out:
8420         return 0;
8421 }
8422
8423 static int delete_extent_records(struct btrfs_trans_handle *trans,
8424                                  struct btrfs_root *root,
8425                                  struct btrfs_path *path,
8426                                  u64 bytenr)
8427 {
8428         struct btrfs_key key;
8429         struct btrfs_key found_key;
8430         struct extent_buffer *leaf;
8431         int ret;
8432         int slot;
8433
8434
8435         key.objectid = bytenr;
8436         key.type = (u8)-1;
8437         key.offset = (u64)-1;
8438
8439         while(1) {
8440                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8441                                         &key, path, 0, 1);
8442                 if (ret < 0)
8443                         break;
8444
8445                 if (ret > 0) {
8446                         ret = 0;
8447                         if (path->slots[0] == 0)
8448                                 break;
8449                         path->slots[0]--;
8450                 }
8451                 ret = 0;
8452
8453                 leaf = path->nodes[0];
8454                 slot = path->slots[0];
8455
8456                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8457                 if (found_key.objectid != bytenr)
8458                         break;
8459
8460                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8461                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8462                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8463                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8464                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8465                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8466                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8467                         btrfs_release_path(path);
8468                         if (found_key.type == 0) {
8469                                 if (found_key.offset == 0)
8470                                         break;
8471                                 key.offset = found_key.offset - 1;
8472                                 key.type = found_key.type;
8473                         }
8474                         key.type = found_key.type - 1;
8475                         key.offset = (u64)-1;
8476                         continue;
8477                 }
8478
8479                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8480                         found_key.objectid, found_key.type, found_key.offset);
8481
8482                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8483                 if (ret)
8484                         break;
8485                 btrfs_release_path(path);
8486
8487                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8488                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8489                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8490                                 found_key.offset : root->fs_info->nodesize;
8491
8492                         ret = btrfs_update_block_group(trans, root, bytenr,
8493                                                        bytes, 0, 0);
8494                         if (ret)
8495                                 break;
8496                 }
8497         }
8498
8499         btrfs_release_path(path);
8500         return ret;
8501 }
8502
8503 /*
8504  * for a single backref, this will allocate a new extent
8505  * and add the backref to it.
8506  */
8507 static int record_extent(struct btrfs_trans_handle *trans,
8508                          struct btrfs_fs_info *info,
8509                          struct btrfs_path *path,
8510                          struct extent_record *rec,
8511                          struct extent_backref *back,
8512                          int allocated, u64 flags)
8513 {
8514         int ret = 0;
8515         struct btrfs_root *extent_root = info->extent_root;
8516         struct extent_buffer *leaf;
8517         struct btrfs_key ins_key;
8518         struct btrfs_extent_item *ei;
8519         struct data_backref *dback;
8520         struct btrfs_tree_block_info *bi;
8521
8522         if (!back->is_data)
8523                 rec->max_size = max_t(u64, rec->max_size,
8524                                     info->nodesize);
8525
8526         if (!allocated) {
8527                 u32 item_size = sizeof(*ei);
8528
8529                 if (!back->is_data)
8530                         item_size += sizeof(*bi);
8531
8532                 ins_key.objectid = rec->start;
8533                 ins_key.offset = rec->max_size;
8534                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8535
8536                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8537                                         &ins_key, item_size);
8538                 if (ret)
8539                         goto fail;
8540
8541                 leaf = path->nodes[0];
8542                 ei = btrfs_item_ptr(leaf, path->slots[0],
8543                                     struct btrfs_extent_item);
8544
8545                 btrfs_set_extent_refs(leaf, ei, 0);
8546                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8547
8548                 if (back->is_data) {
8549                         btrfs_set_extent_flags(leaf, ei,
8550                                                BTRFS_EXTENT_FLAG_DATA);
8551                 } else {
8552                         struct btrfs_disk_key copy_key;;
8553
8554                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8555                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8556                                              sizeof(*bi));
8557
8558                         btrfs_set_disk_key_objectid(&copy_key,
8559                                                     rec->info_objectid);
8560                         btrfs_set_disk_key_type(&copy_key, 0);
8561                         btrfs_set_disk_key_offset(&copy_key, 0);
8562
8563                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8564                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8565
8566                         btrfs_set_extent_flags(leaf, ei,
8567                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8568                 }
8569
8570                 btrfs_mark_buffer_dirty(leaf);
8571                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8572                                                rec->max_size, 1, 0);
8573                 if (ret)
8574                         goto fail;
8575                 btrfs_release_path(path);
8576         }
8577
8578         if (back->is_data) {
8579                 u64 parent;
8580                 int i;
8581
8582                 dback = to_data_backref(back);
8583                 if (back->full_backref)
8584                         parent = dback->parent;
8585                 else
8586                         parent = 0;
8587
8588                 for (i = 0; i < dback->found_ref; i++) {
8589                         /* if parent != 0, we're doing a full backref
8590                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8591                          * just makes the backref allocator create a data
8592                          * backref
8593                          */
8594                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8595                                                    rec->start, rec->max_size,
8596                                                    parent,
8597                                                    dback->root,
8598                                                    parent ?
8599                                                    BTRFS_FIRST_FREE_OBJECTID :
8600                                                    dback->owner,
8601                                                    dback->offset);
8602                         if (ret)
8603                                 break;
8604                 }
8605                 fprintf(stderr, "adding new data backref"
8606                                 " on %llu %s %llu owner %llu"
8607                                 " offset %llu found %d\n",
8608                                 (unsigned long long)rec->start,
8609                                 back->full_backref ?
8610                                 "parent" : "root",
8611                                 back->full_backref ?
8612                                 (unsigned long long)parent :
8613                                 (unsigned long long)dback->root,
8614                                 (unsigned long long)dback->owner,
8615                                 (unsigned long long)dback->offset,
8616                                 dback->found_ref);
8617         } else {
8618                 u64 parent;
8619                 struct tree_backref *tback;
8620
8621                 tback = to_tree_backref(back);
8622                 if (back->full_backref)
8623                         parent = tback->parent;
8624                 else
8625                         parent = 0;
8626
8627                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8628                                            rec->start, rec->max_size,
8629                                            parent, tback->root, 0, 0);
8630                 fprintf(stderr, "adding new tree backref on "
8631                         "start %llu len %llu parent %llu root %llu\n",
8632                         rec->start, rec->max_size, parent, tback->root);
8633         }
8634 fail:
8635         btrfs_release_path(path);
8636         return ret;
8637 }
8638
8639 static struct extent_entry *find_entry(struct list_head *entries,
8640                                        u64 bytenr, u64 bytes)
8641 {
8642         struct extent_entry *entry = NULL;
8643
8644         list_for_each_entry(entry, entries, list) {
8645                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8646                         return entry;
8647         }
8648
8649         return NULL;
8650 }
8651
8652 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8653 {
8654         struct extent_entry *entry, *best = NULL, *prev = NULL;
8655
8656         list_for_each_entry(entry, entries, list) {
8657                 /*
8658                  * If there are as many broken entries as entries then we know
8659                  * not to trust this particular entry.
8660                  */
8661                 if (entry->broken == entry->count)
8662                         continue;
8663
8664                 /*
8665                  * Special case, when there are only two entries and 'best' is
8666                  * the first one
8667                  */
8668                 if (!prev) {
8669                         best = entry;
8670                         prev = entry;
8671                         continue;
8672                 }
8673
8674                 /*
8675                  * If our current entry == best then we can't be sure our best
8676                  * is really the best, so we need to keep searching.
8677                  */
8678                 if (best && best->count == entry->count) {
8679                         prev = entry;
8680                         best = NULL;
8681                         continue;
8682                 }
8683
8684                 /* Prev == entry, not good enough, have to keep searching */
8685                 if (!prev->broken && prev->count == entry->count)
8686                         continue;
8687
8688                 if (!best)
8689                         best = (prev->count > entry->count) ? prev : entry;
8690                 else if (best->count < entry->count)
8691                         best = entry;
8692                 prev = entry;
8693         }
8694
8695         return best;
8696 }
8697
8698 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8699                       struct data_backref *dback, struct extent_entry *entry)
8700 {
8701         struct btrfs_trans_handle *trans;
8702         struct btrfs_root *root;
8703         struct btrfs_file_extent_item *fi;
8704         struct extent_buffer *leaf;
8705         struct btrfs_key key;
8706         u64 bytenr, bytes;
8707         int ret, err;
8708
8709         key.objectid = dback->root;
8710         key.type = BTRFS_ROOT_ITEM_KEY;
8711         key.offset = (u64)-1;
8712         root = btrfs_read_fs_root(info, &key);
8713         if (IS_ERR(root)) {
8714                 fprintf(stderr, "Couldn't find root for our ref\n");
8715                 return -EINVAL;
8716         }
8717
8718         /*
8719          * The backref points to the original offset of the extent if it was
8720          * split, so we need to search down to the offset we have and then walk
8721          * forward until we find the backref we're looking for.
8722          */
8723         key.objectid = dback->owner;
8724         key.type = BTRFS_EXTENT_DATA_KEY;
8725         key.offset = dback->offset;
8726         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8727         if (ret < 0) {
8728                 fprintf(stderr, "Error looking up ref %d\n", ret);
8729                 return ret;
8730         }
8731
8732         while (1) {
8733                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8734                         ret = btrfs_next_leaf(root, path);
8735                         if (ret) {
8736                                 fprintf(stderr, "Couldn't find our ref, next\n");
8737                                 return -EINVAL;
8738                         }
8739                 }
8740                 leaf = path->nodes[0];
8741                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8742                 if (key.objectid != dback->owner ||
8743                     key.type != BTRFS_EXTENT_DATA_KEY) {
8744                         fprintf(stderr, "Couldn't find our ref, search\n");
8745                         return -EINVAL;
8746                 }
8747                 fi = btrfs_item_ptr(leaf, path->slots[0],
8748                                     struct btrfs_file_extent_item);
8749                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8750                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8751
8752                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8753                         break;
8754                 path->slots[0]++;
8755         }
8756
8757         btrfs_release_path(path);
8758
8759         trans = btrfs_start_transaction(root, 1);
8760         if (IS_ERR(trans))
8761                 return PTR_ERR(trans);
8762
8763         /*
8764          * Ok we have the key of the file extent we want to fix, now we can cow
8765          * down to the thing and fix it.
8766          */
8767         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8768         if (ret < 0) {
8769                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8770                         key.objectid, key.type, key.offset, ret);
8771                 goto out;
8772         }
8773         if (ret > 0) {
8774                 fprintf(stderr, "Well that's odd, we just found this key "
8775                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8776                         key.offset);
8777                 ret = -EINVAL;
8778                 goto out;
8779         }
8780         leaf = path->nodes[0];
8781         fi = btrfs_item_ptr(leaf, path->slots[0],
8782                             struct btrfs_file_extent_item);
8783
8784         if (btrfs_file_extent_compression(leaf, fi) &&
8785             dback->disk_bytenr != entry->bytenr) {
8786                 fprintf(stderr, "Ref doesn't match the record start and is "
8787                         "compressed, please take a btrfs-image of this file "
8788                         "system and send it to a btrfs developer so they can "
8789                         "complete this functionality for bytenr %Lu\n",
8790                         dback->disk_bytenr);
8791                 ret = -EINVAL;
8792                 goto out;
8793         }
8794
8795         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8796                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8797         } else if (dback->disk_bytenr > entry->bytenr) {
8798                 u64 off_diff, offset;
8799
8800                 off_diff = dback->disk_bytenr - entry->bytenr;
8801                 offset = btrfs_file_extent_offset(leaf, fi);
8802                 if (dback->disk_bytenr + offset +
8803                     btrfs_file_extent_num_bytes(leaf, fi) >
8804                     entry->bytenr + entry->bytes) {
8805                         fprintf(stderr, "Ref is past the entry end, please "
8806                                 "take a btrfs-image of this file system and "
8807                                 "send it to a btrfs developer, ref %Lu\n",
8808                                 dback->disk_bytenr);
8809                         ret = -EINVAL;
8810                         goto out;
8811                 }
8812                 offset += off_diff;
8813                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8814                 btrfs_set_file_extent_offset(leaf, fi, offset);
8815         } else if (dback->disk_bytenr < entry->bytenr) {
8816                 u64 offset;
8817
8818                 offset = btrfs_file_extent_offset(leaf, fi);
8819                 if (dback->disk_bytenr + offset < entry->bytenr) {
8820                         fprintf(stderr, "Ref is before the entry start, please"
8821                                 " take a btrfs-image of this file system and "
8822                                 "send it to a btrfs developer, ref %Lu\n",
8823                                 dback->disk_bytenr);
8824                         ret = -EINVAL;
8825                         goto out;
8826                 }
8827
8828                 offset += dback->disk_bytenr;
8829                 offset -= entry->bytenr;
8830                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8831                 btrfs_set_file_extent_offset(leaf, fi, offset);
8832         }
8833
8834         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8835
8836         /*
8837          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8838          * only do this if we aren't using compression, otherwise it's a
8839          * trickier case.
8840          */
8841         if (!btrfs_file_extent_compression(leaf, fi))
8842                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8843         else
8844                 printf("ram bytes may be wrong?\n");
8845         btrfs_mark_buffer_dirty(leaf);
8846 out:
8847         err = btrfs_commit_transaction(trans, root);
8848         btrfs_release_path(path);
8849         return ret ? ret : err;
8850 }
8851
8852 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8853                            struct extent_record *rec)
8854 {
8855         struct extent_backref *back, *tmp;
8856         struct data_backref *dback;
8857         struct extent_entry *entry, *best = NULL;
8858         LIST_HEAD(entries);
8859         int nr_entries = 0;
8860         int broken_entries = 0;
8861         int ret = 0;
8862         short mismatch = 0;
8863
8864         /*
8865          * Metadata is easy and the backrefs should always agree on bytenr and
8866          * size, if not we've got bigger issues.
8867          */
8868         if (rec->metadata)
8869                 return 0;
8870
8871         rbtree_postorder_for_each_entry_safe(back, tmp,
8872                                              &rec->backref_tree, node) {
8873                 if (back->full_backref || !back->is_data)
8874                         continue;
8875
8876                 dback = to_data_backref(back);
8877
8878                 /*
8879                  * We only pay attention to backrefs that we found a real
8880                  * backref for.
8881                  */
8882                 if (dback->found_ref == 0)
8883                         continue;
8884
8885                 /*
8886                  * For now we only catch when the bytes don't match, not the
8887                  * bytenr.  We can easily do this at the same time, but I want
8888                  * to have a fs image to test on before we just add repair
8889                  * functionality willy-nilly so we know we won't screw up the
8890                  * repair.
8891                  */
8892
8893                 entry = find_entry(&entries, dback->disk_bytenr,
8894                                    dback->bytes);
8895                 if (!entry) {
8896                         entry = malloc(sizeof(struct extent_entry));
8897                         if (!entry) {
8898                                 ret = -ENOMEM;
8899                                 goto out;
8900                         }
8901                         memset(entry, 0, sizeof(*entry));
8902                         entry->bytenr = dback->disk_bytenr;
8903                         entry->bytes = dback->bytes;
8904                         list_add_tail(&entry->list, &entries);
8905                         nr_entries++;
8906                 }
8907
8908                 /*
8909                  * If we only have on entry we may think the entries agree when
8910                  * in reality they don't so we have to do some extra checking.
8911                  */
8912                 if (dback->disk_bytenr != rec->start ||
8913                     dback->bytes != rec->nr || back->broken)
8914                         mismatch = 1;
8915
8916                 if (back->broken) {
8917                         entry->broken++;
8918                         broken_entries++;
8919                 }
8920
8921                 entry->count++;
8922         }
8923
8924         /* Yay all the backrefs agree, carry on good sir */
8925         if (nr_entries <= 1 && !mismatch)
8926                 goto out;
8927
8928         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8929                 "%Lu\n", rec->start);
8930
8931         /*
8932          * First we want to see if the backrefs can agree amongst themselves who
8933          * is right, so figure out which one of the entries has the highest
8934          * count.
8935          */
8936         best = find_most_right_entry(&entries);
8937
8938         /*
8939          * Ok so we may have an even split between what the backrefs think, so
8940          * this is where we use the extent ref to see what it thinks.
8941          */
8942         if (!best) {
8943                 entry = find_entry(&entries, rec->start, rec->nr);
8944                 if (!entry && (!broken_entries || !rec->found_rec)) {
8945                         fprintf(stderr, "Backrefs don't agree with each other "
8946                                 "and extent record doesn't agree with anybody,"
8947                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8948                                 rec->start, rec->nr);
8949                         ret = -EINVAL;
8950                         goto out;
8951                 } else if (!entry) {
8952                         /*
8953                          * Ok our backrefs were broken, we'll assume this is the
8954                          * correct value and add an entry for this range.
8955                          */
8956                         entry = malloc(sizeof(struct extent_entry));
8957                         if (!entry) {
8958                                 ret = -ENOMEM;
8959                                 goto out;
8960                         }
8961                         memset(entry, 0, sizeof(*entry));
8962                         entry->bytenr = rec->start;
8963                         entry->bytes = rec->nr;
8964                         list_add_tail(&entry->list, &entries);
8965                         nr_entries++;
8966                 }
8967                 entry->count++;
8968                 best = find_most_right_entry(&entries);
8969                 if (!best) {
8970                         fprintf(stderr, "Backrefs and extent record evenly "
8971                                 "split on who is right, this is going to "
8972                                 "require user input to fix bytenr %Lu bytes "
8973                                 "%Lu\n", rec->start, rec->nr);
8974                         ret = -EINVAL;
8975                         goto out;
8976                 }
8977         }
8978
8979         /*
8980          * I don't think this can happen currently as we'll abort() if we catch
8981          * this case higher up, but in case somebody removes that we still can't
8982          * deal with it properly here yet, so just bail out of that's the case.
8983          */
8984         if (best->bytenr != rec->start) {
8985                 fprintf(stderr, "Extent start and backref starts don't match, "
8986                         "please use btrfs-image on this file system and send "
8987                         "it to a btrfs developer so they can make fsck fix "
8988                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8989                         rec->start, rec->nr);
8990                 ret = -EINVAL;
8991                 goto out;
8992         }
8993
8994         /*
8995          * Ok great we all agreed on an extent record, let's go find the real
8996          * references and fix up the ones that don't match.
8997          */
8998         rbtree_postorder_for_each_entry_safe(back, tmp,
8999                                              &rec->backref_tree, node) {
9000                 if (back->full_backref || !back->is_data)
9001                         continue;
9002
9003                 dback = to_data_backref(back);
9004
9005                 /*
9006                  * Still ignoring backrefs that don't have a real ref attached
9007                  * to them.
9008                  */
9009                 if (dback->found_ref == 0)
9010                         continue;
9011
9012                 if (dback->bytes == best->bytes &&
9013                     dback->disk_bytenr == best->bytenr)
9014                         continue;
9015
9016                 ret = repair_ref(info, path, dback, best);
9017                 if (ret)
9018                         goto out;
9019         }
9020
9021         /*
9022          * Ok we messed with the actual refs, which means we need to drop our
9023          * entire cache and go back and rescan.  I know this is a huge pain and
9024          * adds a lot of extra work, but it's the only way to be safe.  Once all
9025          * the backrefs agree we may not need to do anything to the extent
9026          * record itself.
9027          */
9028         ret = -EAGAIN;
9029 out:
9030         while (!list_empty(&entries)) {
9031                 entry = list_entry(entries.next, struct extent_entry, list);
9032                 list_del_init(&entry->list);
9033                 free(entry);
9034         }
9035         return ret;
9036 }
9037
9038 static int process_duplicates(struct cache_tree *extent_cache,
9039                               struct extent_record *rec)
9040 {
9041         struct extent_record *good, *tmp;
9042         struct cache_extent *cache;
9043         int ret;
9044
9045         /*
9046          * If we found a extent record for this extent then return, or if we
9047          * have more than one duplicate we are likely going to need to delete
9048          * something.
9049          */
9050         if (rec->found_rec || rec->num_duplicates > 1)
9051                 return 0;
9052
9053         /* Shouldn't happen but just in case */
9054         BUG_ON(!rec->num_duplicates);
9055
9056         /*
9057          * So this happens if we end up with a backref that doesn't match the
9058          * actual extent entry.  So either the backref is bad or the extent
9059          * entry is bad.  Either way we want to have the extent_record actually
9060          * reflect what we found in the extent_tree, so we need to take the
9061          * duplicate out and use that as the extent_record since the only way we
9062          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9063          */
9064         remove_cache_extent(extent_cache, &rec->cache);
9065
9066         good = to_extent_record(rec->dups.next);
9067         list_del_init(&good->list);
9068         INIT_LIST_HEAD(&good->backrefs);
9069         INIT_LIST_HEAD(&good->dups);
9070         good->cache.start = good->start;
9071         good->cache.size = good->nr;
9072         good->content_checked = 0;
9073         good->owner_ref_checked = 0;
9074         good->num_duplicates = 0;
9075         good->refs = rec->refs;
9076         list_splice_init(&rec->backrefs, &good->backrefs);
9077         while (1) {
9078                 cache = lookup_cache_extent(extent_cache, good->start,
9079                                             good->nr);
9080                 if (!cache)
9081                         break;
9082                 tmp = container_of(cache, struct extent_record, cache);
9083
9084                 /*
9085                  * If we find another overlapping extent and it's found_rec is
9086                  * set then it's a duplicate and we need to try and delete
9087                  * something.
9088                  */
9089                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9090                         if (list_empty(&good->list))
9091                                 list_add_tail(&good->list,
9092                                               &duplicate_extents);
9093                         good->num_duplicates += tmp->num_duplicates + 1;
9094                         list_splice_init(&tmp->dups, &good->dups);
9095                         list_del_init(&tmp->list);
9096                         list_add_tail(&tmp->list, &good->dups);
9097                         remove_cache_extent(extent_cache, &tmp->cache);
9098                         continue;
9099                 }
9100
9101                 /*
9102                  * Ok we have another non extent item backed extent rec, so lets
9103                  * just add it to this extent and carry on like we did above.
9104                  */
9105                 good->refs += tmp->refs;
9106                 list_splice_init(&tmp->backrefs, &good->backrefs);
9107                 remove_cache_extent(extent_cache, &tmp->cache);
9108                 free(tmp);
9109         }
9110         ret = insert_cache_extent(extent_cache, &good->cache);
9111         BUG_ON(ret);
9112         free(rec);
9113         return good->num_duplicates ? 0 : 1;
9114 }
9115
9116 static int delete_duplicate_records(struct btrfs_root *root,
9117                                     struct extent_record *rec)
9118 {
9119         struct btrfs_trans_handle *trans;
9120         LIST_HEAD(delete_list);
9121         struct btrfs_path path;
9122         struct extent_record *tmp, *good, *n;
9123         int nr_del = 0;
9124         int ret = 0, err;
9125         struct btrfs_key key;
9126
9127         btrfs_init_path(&path);
9128
9129         good = rec;
9130         /* Find the record that covers all of the duplicates. */
9131         list_for_each_entry(tmp, &rec->dups, list) {
9132                 if (good->start < tmp->start)
9133                         continue;
9134                 if (good->nr > tmp->nr)
9135                         continue;
9136
9137                 if (tmp->start + tmp->nr < good->start + good->nr) {
9138                         fprintf(stderr, "Ok we have overlapping extents that "
9139                                 "aren't completely covered by each other, this "
9140                                 "is going to require more careful thought.  "
9141                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9142                                 tmp->start, tmp->nr, good->start, good->nr);
9143                         abort();
9144                 }
9145                 good = tmp;
9146         }
9147
9148         if (good != rec)
9149                 list_add_tail(&rec->list, &delete_list);
9150
9151         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9152                 if (tmp == good)
9153                         continue;
9154                 list_move_tail(&tmp->list, &delete_list);
9155         }
9156
9157         root = root->fs_info->extent_root;
9158         trans = btrfs_start_transaction(root, 1);
9159         if (IS_ERR(trans)) {
9160                 ret = PTR_ERR(trans);
9161                 goto out;
9162         }
9163
9164         list_for_each_entry(tmp, &delete_list, list) {
9165                 if (tmp->found_rec == 0)
9166                         continue;
9167                 key.objectid = tmp->start;
9168                 key.type = BTRFS_EXTENT_ITEM_KEY;
9169                 key.offset = tmp->nr;
9170
9171                 /* Shouldn't happen but just in case */
9172                 if (tmp->metadata) {
9173                         fprintf(stderr, "Well this shouldn't happen, extent "
9174                                 "record overlaps but is metadata? "
9175                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9176                         abort();
9177                 }
9178
9179                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9180                 if (ret) {
9181                         if (ret > 0)
9182                                 ret = -EINVAL;
9183                         break;
9184                 }
9185                 ret = btrfs_del_item(trans, root, &path);
9186                 if (ret)
9187                         break;
9188                 btrfs_release_path(&path);
9189                 nr_del++;
9190         }
9191         err = btrfs_commit_transaction(trans, root);
9192         if (err && !ret)
9193                 ret = err;
9194 out:
9195         while (!list_empty(&delete_list)) {
9196                 tmp = to_extent_record(delete_list.next);
9197                 list_del_init(&tmp->list);
9198                 if (tmp == rec)
9199                         continue;
9200                 free(tmp);
9201         }
9202
9203         while (!list_empty(&rec->dups)) {
9204                 tmp = to_extent_record(rec->dups.next);
9205                 list_del_init(&tmp->list);
9206                 free(tmp);
9207         }
9208
9209         btrfs_release_path(&path);
9210
9211         if (!ret && !nr_del)
9212                 rec->num_duplicates = 0;
9213
9214         return ret ? ret : nr_del;
9215 }
9216
9217 static int find_possible_backrefs(struct btrfs_fs_info *info,
9218                                   struct btrfs_path *path,
9219                                   struct cache_tree *extent_cache,
9220                                   struct extent_record *rec)
9221 {
9222         struct btrfs_root *root;
9223         struct extent_backref *back, *tmp;
9224         struct data_backref *dback;
9225         struct cache_extent *cache;
9226         struct btrfs_file_extent_item *fi;
9227         struct btrfs_key key;
9228         u64 bytenr, bytes;
9229         int ret;
9230
9231         rbtree_postorder_for_each_entry_safe(back, tmp,
9232                                              &rec->backref_tree, node) {
9233                 /* Don't care about full backrefs (poor unloved backrefs) */
9234                 if (back->full_backref || !back->is_data)
9235                         continue;
9236
9237                 dback = to_data_backref(back);
9238
9239                 /* We found this one, we don't need to do a lookup */
9240                 if (dback->found_ref)
9241                         continue;
9242
9243                 key.objectid = dback->root;
9244                 key.type = BTRFS_ROOT_ITEM_KEY;
9245                 key.offset = (u64)-1;
9246
9247                 root = btrfs_read_fs_root(info, &key);
9248
9249                 /* No root, definitely a bad ref, skip */
9250                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9251                         continue;
9252                 /* Other err, exit */
9253                 if (IS_ERR(root))
9254                         return PTR_ERR(root);
9255
9256                 key.objectid = dback->owner;
9257                 key.type = BTRFS_EXTENT_DATA_KEY;
9258                 key.offset = dback->offset;
9259                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9260                 if (ret) {
9261                         btrfs_release_path(path);
9262                         if (ret < 0)
9263                                 return ret;
9264                         /* Didn't find it, we can carry on */
9265                         ret = 0;
9266                         continue;
9267                 }
9268
9269                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9270                                     struct btrfs_file_extent_item);
9271                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9272                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9273                 btrfs_release_path(path);
9274                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9275                 if (cache) {
9276                         struct extent_record *tmp;
9277                         tmp = container_of(cache, struct extent_record, cache);
9278
9279                         /*
9280                          * If we found an extent record for the bytenr for this
9281                          * particular backref then we can't add it to our
9282                          * current extent record.  We only want to add backrefs
9283                          * that don't have a corresponding extent item in the
9284                          * extent tree since they likely belong to this record
9285                          * and we need to fix it if it doesn't match bytenrs.
9286                          */
9287                         if  (tmp->found_rec)
9288                                 continue;
9289                 }
9290
9291                 dback->found_ref += 1;
9292                 dback->disk_bytenr = bytenr;
9293                 dback->bytes = bytes;
9294
9295                 /*
9296                  * Set this so the verify backref code knows not to trust the
9297                  * values in this backref.
9298                  */
9299                 back->broken = 1;
9300         }
9301
9302         return 0;
9303 }
9304
9305 /*
9306  * Record orphan data ref into corresponding root.
9307  *
9308  * Return 0 if the extent item contains data ref and recorded.
9309  * Return 1 if the extent item contains no useful data ref
9310  *   On that case, it may contains only shared_dataref or metadata backref
9311  *   or the file extent exists(this should be handled by the extent bytenr
9312  *   recovery routine)
9313  * Return <0 if something goes wrong.
9314  */
9315 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9316                                       struct extent_record *rec)
9317 {
9318         struct btrfs_key key;
9319         struct btrfs_root *dest_root;
9320         struct extent_backref *back, *tmp;
9321         struct data_backref *dback;
9322         struct orphan_data_extent *orphan;
9323         struct btrfs_path path;
9324         int recorded_data_ref = 0;
9325         int ret = 0;
9326
9327         if (rec->metadata)
9328                 return 1;
9329         btrfs_init_path(&path);
9330         rbtree_postorder_for_each_entry_safe(back, tmp,
9331                                              &rec->backref_tree, node) {
9332                 if (back->full_backref || !back->is_data ||
9333                     !back->found_extent_tree)
9334                         continue;
9335                 dback = to_data_backref(back);
9336                 if (dback->found_ref)
9337                         continue;
9338                 key.objectid = dback->root;
9339                 key.type = BTRFS_ROOT_ITEM_KEY;
9340                 key.offset = (u64)-1;
9341
9342                 dest_root = btrfs_read_fs_root(fs_info, &key);
9343
9344                 /* For non-exist root we just skip it */
9345                 if (IS_ERR(dest_root) || !dest_root)
9346                         continue;
9347
9348                 key.objectid = dback->owner;
9349                 key.type = BTRFS_EXTENT_DATA_KEY;
9350                 key.offset = dback->offset;
9351
9352                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9353                 btrfs_release_path(&path);
9354                 /*
9355                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9356                  * we need to record it for inode/file extent rebuild.
9357                  * For ret > 0, we record it only for file extent rebuild.
9358                  * For ret == 0, the file extent exists but only bytenr
9359                  * mismatch, let the original bytenr fix routine to handle,
9360                  * don't record it.
9361                  */
9362                 if (ret == 0)
9363                         continue;
9364                 ret = 0;
9365                 orphan = malloc(sizeof(*orphan));
9366                 if (!orphan) {
9367                         ret = -ENOMEM;
9368                         goto out;
9369                 }
9370                 INIT_LIST_HEAD(&orphan->list);
9371                 orphan->root = dback->root;
9372                 orphan->objectid = dback->owner;
9373                 orphan->offset = dback->offset;
9374                 orphan->disk_bytenr = rec->cache.start;
9375                 orphan->disk_len = rec->cache.size;
9376                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9377                 recorded_data_ref = 1;
9378         }
9379 out:
9380         btrfs_release_path(&path);
9381         if (!ret)
9382                 return !recorded_data_ref;
9383         else
9384                 return ret;
9385 }
9386
9387 /*
9388  * when an incorrect extent item is found, this will delete
9389  * all of the existing entries for it and recreate them
9390  * based on what the tree scan found.
9391  */
9392 static int fixup_extent_refs(struct btrfs_fs_info *info,
9393                              struct cache_tree *extent_cache,
9394                              struct extent_record *rec)
9395 {
9396         struct btrfs_trans_handle *trans = NULL;
9397         int ret;
9398         struct btrfs_path path;
9399         struct cache_extent *cache;
9400         struct extent_backref *back, *tmp;
9401         int allocated = 0;
9402         u64 flags = 0;
9403
9404         if (rec->flag_block_full_backref)
9405                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9406
9407         btrfs_init_path(&path);
9408         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9409                 /*
9410                  * Sometimes the backrefs themselves are so broken they don't
9411                  * get attached to any meaningful rec, so first go back and
9412                  * check any of our backrefs that we couldn't find and throw
9413                  * them into the list if we find the backref so that
9414                  * verify_backrefs can figure out what to do.
9415                  */
9416                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9417                 if (ret < 0)
9418                         goto out;
9419         }
9420
9421         /* step one, make sure all of the backrefs agree */
9422         ret = verify_backrefs(info, &path, rec);
9423         if (ret < 0)
9424                 goto out;
9425
9426         trans = btrfs_start_transaction(info->extent_root, 1);
9427         if (IS_ERR(trans)) {
9428                 ret = PTR_ERR(trans);
9429                 goto out;
9430         }
9431
9432         /* step two, delete all the existing records */
9433         ret = delete_extent_records(trans, info->extent_root, &path,
9434                                     rec->start);
9435
9436         if (ret < 0)
9437                 goto out;
9438
9439         /* was this block corrupt?  If so, don't add references to it */
9440         cache = lookup_cache_extent(info->corrupt_blocks,
9441                                     rec->start, rec->max_size);
9442         if (cache) {
9443                 ret = 0;
9444                 goto out;
9445         }
9446
9447         /* step three, recreate all the refs we did find */
9448         rbtree_postorder_for_each_entry_safe(back, tmp,
9449                                              &rec->backref_tree, node) {
9450                 /*
9451                  * if we didn't find any references, don't create a
9452                  * new extent record
9453                  */
9454                 if (!back->found_ref)
9455                         continue;
9456
9457                 rec->bad_full_backref = 0;
9458                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9459                 allocated = 1;
9460
9461                 if (ret)
9462                         goto out;
9463         }
9464 out:
9465         if (trans) {
9466                 int err = btrfs_commit_transaction(trans, info->extent_root);
9467                 if (!ret)
9468                         ret = err;
9469         }
9470
9471         if (!ret)
9472                 fprintf(stderr, "Repaired extent references for %llu\n",
9473                                 (unsigned long long)rec->start);
9474
9475         btrfs_release_path(&path);
9476         return ret;
9477 }
9478
9479 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9480                               struct extent_record *rec)
9481 {
9482         struct btrfs_trans_handle *trans;
9483         struct btrfs_root *root = fs_info->extent_root;
9484         struct btrfs_path path;
9485         struct btrfs_extent_item *ei;
9486         struct btrfs_key key;
9487         u64 flags;
9488         int ret = 0;
9489
9490         key.objectid = rec->start;
9491         if (rec->metadata) {
9492                 key.type = BTRFS_METADATA_ITEM_KEY;
9493                 key.offset = rec->info_level;
9494         } else {
9495                 key.type = BTRFS_EXTENT_ITEM_KEY;
9496                 key.offset = rec->max_size;
9497         }
9498
9499         trans = btrfs_start_transaction(root, 0);
9500         if (IS_ERR(trans))
9501                 return PTR_ERR(trans);
9502
9503         btrfs_init_path(&path);
9504         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9505         if (ret < 0) {
9506                 btrfs_release_path(&path);
9507                 btrfs_commit_transaction(trans, root);
9508                 return ret;
9509         } else if (ret) {
9510                 fprintf(stderr, "Didn't find extent for %llu\n",
9511                         (unsigned long long)rec->start);
9512                 btrfs_release_path(&path);
9513                 btrfs_commit_transaction(trans, root);
9514                 return -ENOENT;
9515         }
9516
9517         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9518                             struct btrfs_extent_item);
9519         flags = btrfs_extent_flags(path.nodes[0], ei);
9520         if (rec->flag_block_full_backref) {
9521                 fprintf(stderr, "setting full backref on %llu\n",
9522                         (unsigned long long)key.objectid);
9523                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9524         } else {
9525                 fprintf(stderr, "clearing full backref on %llu\n",
9526                         (unsigned long long)key.objectid);
9527                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9528         }
9529         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9530         btrfs_mark_buffer_dirty(path.nodes[0]);
9531         btrfs_release_path(&path);
9532         ret = btrfs_commit_transaction(trans, root);
9533         if (!ret)
9534                 fprintf(stderr, "Repaired extent flags for %llu\n",
9535                                 (unsigned long long)rec->start);
9536
9537         return ret;
9538 }
9539
9540 /* right now we only prune from the extent allocation tree */
9541 static int prune_one_block(struct btrfs_trans_handle *trans,
9542                            struct btrfs_fs_info *info,
9543                            struct btrfs_corrupt_block *corrupt)
9544 {
9545         int ret;
9546         struct btrfs_path path;
9547         struct extent_buffer *eb;
9548         u64 found;
9549         int slot;
9550         int nritems;
9551         int level = corrupt->level + 1;
9552
9553         btrfs_init_path(&path);
9554 again:
9555         /* we want to stop at the parent to our busted block */
9556         path.lowest_level = level;
9557
9558         ret = btrfs_search_slot(trans, info->extent_root,
9559                                 &corrupt->key, &path, -1, 1);
9560
9561         if (ret < 0)
9562                 goto out;
9563
9564         eb = path.nodes[level];
9565         if (!eb) {
9566                 ret = -ENOENT;
9567                 goto out;
9568         }
9569
9570         /*
9571          * hopefully the search gave us the block we want to prune,
9572          * lets try that first
9573          */
9574         slot = path.slots[level];
9575         found =  btrfs_node_blockptr(eb, slot);
9576         if (found == corrupt->cache.start)
9577                 goto del_ptr;
9578
9579         nritems = btrfs_header_nritems(eb);
9580
9581         /* the search failed, lets scan this node and hope we find it */
9582         for (slot = 0; slot < nritems; slot++) {
9583                 found =  btrfs_node_blockptr(eb, slot);
9584                 if (found == corrupt->cache.start)
9585                         goto del_ptr;
9586         }
9587         /*
9588          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9589          * to this block
9590          */
9591         if (eb == info->extent_root->node) {
9592                 ret = -ENOENT;
9593                 goto out;
9594         } else {
9595                 level++;
9596                 btrfs_release_path(&path);
9597                 goto again;
9598         }
9599
9600 del_ptr:
9601         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9602         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9603
9604 out:
9605         btrfs_release_path(&path);
9606         return ret;
9607 }
9608
9609 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9610 {
9611         struct btrfs_trans_handle *trans = NULL;
9612         struct cache_extent *cache;
9613         struct btrfs_corrupt_block *corrupt;
9614
9615         while (1) {
9616                 cache = search_cache_extent(info->corrupt_blocks, 0);
9617                 if (!cache)
9618                         break;
9619                 if (!trans) {
9620                         trans = btrfs_start_transaction(info->extent_root, 1);
9621                         if (IS_ERR(trans))
9622                                 return PTR_ERR(trans);
9623                 }
9624                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9625                 prune_one_block(trans, info, corrupt);
9626                 remove_cache_extent(info->corrupt_blocks, cache);
9627         }
9628         if (trans)
9629                 return btrfs_commit_transaction(trans, info->extent_root);
9630         return 0;
9631 }
9632
9633 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9634 {
9635         struct btrfs_block_group_cache *cache;
9636         u64 start, end;
9637         int ret;
9638
9639         while (1) {
9640                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9641                                             &start, &end, EXTENT_DIRTY);
9642                 if (ret)
9643                         break;
9644                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9645         }
9646
9647         start = 0;
9648         while (1) {
9649                 cache = btrfs_lookup_first_block_group(fs_info, start);
9650                 if (!cache)
9651                         break;
9652                 if (cache->cached)
9653                         cache->cached = 0;
9654                 start = cache->key.objectid + cache->key.offset;
9655         }
9656 }
9657
9658 static int check_extent_refs(struct btrfs_root *root,
9659                              struct cache_tree *extent_cache)
9660 {
9661         struct extent_record *rec;
9662         struct cache_extent *cache;
9663         int ret = 0;
9664         int had_dups = 0;
9665
9666         if (repair) {
9667                 /*
9668                  * if we're doing a repair, we have to make sure
9669                  * we don't allocate from the problem extents.
9670                  * In the worst case, this will be all the
9671                  * extents in the FS
9672                  */
9673                 cache = search_cache_extent(extent_cache, 0);
9674                 while(cache) {
9675                         rec = container_of(cache, struct extent_record, cache);
9676                         set_extent_dirty(root->fs_info->excluded_extents,
9677                                          rec->start,
9678                                          rec->start + rec->max_size - 1);
9679                         cache = next_cache_extent(cache);
9680                 }
9681
9682                 /* pin down all the corrupted blocks too */
9683                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9684                 while(cache) {
9685                         set_extent_dirty(root->fs_info->excluded_extents,
9686                                          cache->start,
9687                                          cache->start + cache->size - 1);
9688                         cache = next_cache_extent(cache);
9689                 }
9690                 prune_corrupt_blocks(root->fs_info);
9691                 reset_cached_block_groups(root->fs_info);
9692         }
9693
9694         reset_cached_block_groups(root->fs_info);
9695
9696         /*
9697          * We need to delete any duplicate entries we find first otherwise we
9698          * could mess up the extent tree when we have backrefs that actually
9699          * belong to a different extent item and not the weird duplicate one.
9700          */
9701         while (repair && !list_empty(&duplicate_extents)) {
9702                 rec = to_extent_record(duplicate_extents.next);
9703                 list_del_init(&rec->list);
9704
9705                 /* Sometimes we can find a backref before we find an actual
9706                  * extent, so we need to process it a little bit to see if there
9707                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9708                  * if this is a backref screwup.  If we need to delete stuff
9709                  * process_duplicates() will return 0, otherwise it will return
9710                  * 1 and we
9711                  */
9712                 if (process_duplicates(extent_cache, rec))
9713                         continue;
9714                 ret = delete_duplicate_records(root, rec);
9715                 if (ret < 0)
9716                         return ret;
9717                 /*
9718                  * delete_duplicate_records will return the number of entries
9719                  * deleted, so if it's greater than 0 then we know we actually
9720                  * did something and we need to remove.
9721                  */
9722                 if (ret)
9723                         had_dups = 1;
9724         }
9725
9726         if (had_dups)
9727                 return -EAGAIN;
9728
9729         while(1) {
9730                 int cur_err = 0;
9731                 int fix = 0;
9732
9733                 cache = search_cache_extent(extent_cache, 0);
9734                 if (!cache)
9735                         break;
9736                 rec = container_of(cache, struct extent_record, cache);
9737                 if (rec->num_duplicates) {
9738                         fprintf(stderr, "extent item %llu has multiple extent "
9739                                 "items\n", (unsigned long long)rec->start);
9740                         cur_err = 1;
9741                 }
9742
9743                 if (rec->refs != rec->extent_item_refs) {
9744                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9745                                 (unsigned long long)rec->start,
9746                                 (unsigned long long)rec->nr);
9747                         fprintf(stderr, "extent item %llu, found %llu\n",
9748                                 (unsigned long long)rec->extent_item_refs,
9749                                 (unsigned long long)rec->refs);
9750                         ret = record_orphan_data_extents(root->fs_info, rec);
9751                         if (ret < 0)
9752                                 goto repair_abort;
9753                         fix = ret;
9754                         cur_err = 1;
9755                 }
9756                 if (all_backpointers_checked(rec, 1)) {
9757                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9758                                 (unsigned long long)rec->start,
9759                                 (unsigned long long)rec->nr);
9760                         fix = 1;
9761                         cur_err = 1;
9762                 }
9763                 if (!rec->owner_ref_checked) {
9764                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9765                                 (unsigned long long)rec->start,
9766                                 (unsigned long long)rec->nr);
9767                         fix = 1;
9768                         cur_err = 1;
9769                 }
9770
9771                 if (repair && fix) {
9772                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9773                         if (ret)
9774                                 goto repair_abort;
9775                 }
9776
9777
9778                 if (rec->bad_full_backref) {
9779                         fprintf(stderr, "bad full backref, on [%llu]\n",
9780                                 (unsigned long long)rec->start);
9781                         if (repair) {
9782                                 ret = fixup_extent_flags(root->fs_info, rec);
9783                                 if (ret)
9784                                         goto repair_abort;
9785                                 fix = 1;
9786                         }
9787                         cur_err = 1;
9788                 }
9789                 /*
9790                  * Although it's not a extent ref's problem, we reuse this
9791                  * routine for error reporting.
9792                  * No repair function yet.
9793                  */
9794                 if (rec->crossing_stripes) {
9795                         fprintf(stderr,
9796                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9797                                 rec->start, rec->start + rec->max_size);
9798                         cur_err = 1;
9799                 }
9800
9801                 if (rec->wrong_chunk_type) {
9802                         fprintf(stderr,
9803                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9804                                 rec->start, rec->start + rec->max_size);
9805                         cur_err = 1;
9806                 }
9807
9808                 remove_cache_extent(extent_cache, cache);
9809                 free_all_extent_backrefs(rec);
9810                 if (!init_extent_tree && repair && (!cur_err || fix))
9811                         clear_extent_dirty(root->fs_info->excluded_extents,
9812                                            rec->start,
9813                                            rec->start + rec->max_size - 1);
9814                 free(rec);
9815         }
9816 repair_abort:
9817         if (repair) {
9818                 if (ret && ret != -EAGAIN) {
9819                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9820                         exit(1);
9821                 } else if (!ret) {
9822                         struct btrfs_trans_handle *trans;
9823
9824                         root = root->fs_info->extent_root;
9825                         trans = btrfs_start_transaction(root, 1);
9826                         if (IS_ERR(trans)) {
9827                                 ret = PTR_ERR(trans);
9828                                 goto repair_abort;
9829                         }
9830
9831                         ret = btrfs_fix_block_accounting(trans, root);
9832                         if (ret)
9833                                 goto repair_abort;
9834                         ret = btrfs_commit_transaction(trans, root);
9835                         if (ret)
9836                                 goto repair_abort;
9837                 }
9838                 return ret;
9839         }
9840         return 0;
9841 }
9842
9843 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9844 {
9845         u64 stripe_size;
9846
9847         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9848                 stripe_size = length;
9849                 stripe_size /= num_stripes;
9850         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9851                 stripe_size = length * 2;
9852                 stripe_size /= num_stripes;
9853         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9854                 stripe_size = length;
9855                 stripe_size /= (num_stripes - 1);
9856         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9857                 stripe_size = length;
9858                 stripe_size /= (num_stripes - 2);
9859         } else {
9860                 stripe_size = length;
9861         }
9862         return stripe_size;
9863 }
9864
9865 /*
9866  * Check the chunk with its block group/dev list ref:
9867  * Return 0 if all refs seems valid.
9868  * Return 1 if part of refs seems valid, need later check for rebuild ref
9869  * like missing block group and needs to search extent tree to rebuild them.
9870  * Return -1 if essential refs are missing and unable to rebuild.
9871  */
9872 static int check_chunk_refs(struct chunk_record *chunk_rec,
9873                             struct block_group_tree *block_group_cache,
9874                             struct device_extent_tree *dev_extent_cache,
9875                             int silent)
9876 {
9877         struct cache_extent *block_group_item;
9878         struct block_group_record *block_group_rec;
9879         struct cache_extent *dev_extent_item;
9880         struct device_extent_record *dev_extent_rec;
9881         u64 devid;
9882         u64 offset;
9883         u64 length;
9884         int metadump_v2 = 0;
9885         int i;
9886         int ret = 0;
9887
9888         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9889                                                chunk_rec->offset,
9890                                                chunk_rec->length);
9891         if (block_group_item) {
9892                 block_group_rec = container_of(block_group_item,
9893                                                struct block_group_record,
9894                                                cache);
9895                 if (chunk_rec->length != block_group_rec->offset ||
9896                     chunk_rec->offset != block_group_rec->objectid ||
9897                     (!metadump_v2 &&
9898                      chunk_rec->type_flags != block_group_rec->flags)) {
9899                         if (!silent)
9900                                 fprintf(stderr,
9901                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9902                                         chunk_rec->objectid,
9903                                         chunk_rec->type,
9904                                         chunk_rec->offset,
9905                                         chunk_rec->length,
9906                                         chunk_rec->offset,
9907                                         chunk_rec->type_flags,
9908                                         block_group_rec->objectid,
9909                                         block_group_rec->type,
9910                                         block_group_rec->offset,
9911                                         block_group_rec->offset,
9912                                         block_group_rec->objectid,
9913                                         block_group_rec->flags);
9914                         ret = -1;
9915                 } else {
9916                         list_del_init(&block_group_rec->list);
9917                         chunk_rec->bg_rec = block_group_rec;
9918                 }
9919         } else {
9920                 if (!silent)
9921                         fprintf(stderr,
9922                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9923                                 chunk_rec->objectid,
9924                                 chunk_rec->type,
9925                                 chunk_rec->offset,
9926                                 chunk_rec->length,
9927                                 chunk_rec->offset,
9928                                 chunk_rec->type_flags);
9929                 ret = 1;
9930         }
9931
9932         if (metadump_v2)
9933                 return ret;
9934
9935         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9936                                     chunk_rec->num_stripes);
9937         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9938                 devid = chunk_rec->stripes[i].devid;
9939                 offset = chunk_rec->stripes[i].offset;
9940                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9941                                                        devid, offset, length);
9942                 if (dev_extent_item) {
9943                         dev_extent_rec = container_of(dev_extent_item,
9944                                                 struct device_extent_record,
9945                                                 cache);
9946                         if (dev_extent_rec->objectid != devid ||
9947                             dev_extent_rec->offset != offset ||
9948                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9949                             dev_extent_rec->length != length) {
9950                                 if (!silent)
9951                                         fprintf(stderr,
9952                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9953                                                 chunk_rec->objectid,
9954                                                 chunk_rec->type,
9955                                                 chunk_rec->offset,
9956                                                 chunk_rec->stripes[i].devid,
9957                                                 chunk_rec->stripes[i].offset,
9958                                                 dev_extent_rec->objectid,
9959                                                 dev_extent_rec->offset,
9960                                                 dev_extent_rec->length);
9961                                 ret = -1;
9962                         } else {
9963                                 list_move(&dev_extent_rec->chunk_list,
9964                                           &chunk_rec->dextents);
9965                         }
9966                 } else {
9967                         if (!silent)
9968                                 fprintf(stderr,
9969                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9970                                         chunk_rec->objectid,
9971                                         chunk_rec->type,
9972                                         chunk_rec->offset,
9973                                         chunk_rec->stripes[i].devid,
9974                                         chunk_rec->stripes[i].offset);
9975                         ret = -1;
9976                 }
9977         }
9978         return ret;
9979 }
9980
9981 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9982 int check_chunks(struct cache_tree *chunk_cache,
9983                  struct block_group_tree *block_group_cache,
9984                  struct device_extent_tree *dev_extent_cache,
9985                  struct list_head *good, struct list_head *bad,
9986                  struct list_head *rebuild, int silent)
9987 {
9988         struct cache_extent *chunk_item;
9989         struct chunk_record *chunk_rec;
9990         struct block_group_record *bg_rec;
9991         struct device_extent_record *dext_rec;
9992         int err;
9993         int ret = 0;
9994
9995         chunk_item = first_cache_extent(chunk_cache);
9996         while (chunk_item) {
9997                 chunk_rec = container_of(chunk_item, struct chunk_record,
9998                                          cache);
9999                 err = check_chunk_refs(chunk_rec, block_group_cache,
10000                                        dev_extent_cache, silent);
10001                 if (err < 0)
10002                         ret = err;
10003                 if (err == 0 && good)
10004                         list_add_tail(&chunk_rec->list, good);
10005                 if (err > 0 && rebuild)
10006                         list_add_tail(&chunk_rec->list, rebuild);
10007                 if (err < 0 && bad)
10008                         list_add_tail(&chunk_rec->list, bad);
10009                 chunk_item = next_cache_extent(chunk_item);
10010         }
10011
10012         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10013                 if (!silent)
10014                         fprintf(stderr,
10015                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10016                                 bg_rec->objectid,
10017                                 bg_rec->offset,
10018                                 bg_rec->flags);
10019                 if (!ret)
10020                         ret = 1;
10021         }
10022
10023         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10024                             chunk_list) {
10025                 if (!silent)
10026                         fprintf(stderr,
10027                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10028                                 dext_rec->objectid,
10029                                 dext_rec->offset,
10030                                 dext_rec->length);
10031                 if (!ret)
10032                         ret = 1;
10033         }
10034         return ret;
10035 }
10036
10037
10038 static int check_device_used(struct device_record *dev_rec,
10039                              struct device_extent_tree *dext_cache)
10040 {
10041         struct cache_extent *cache;
10042         struct device_extent_record *dev_extent_rec;
10043         u64 total_byte = 0;
10044
10045         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10046         while (cache) {
10047                 dev_extent_rec = container_of(cache,
10048                                               struct device_extent_record,
10049                                               cache);
10050                 if (dev_extent_rec->objectid != dev_rec->devid)
10051                         break;
10052
10053                 list_del_init(&dev_extent_rec->device_list);
10054                 total_byte += dev_extent_rec->length;
10055                 cache = next_cache_extent(cache);
10056         }
10057
10058         if (total_byte != dev_rec->byte_used) {
10059                 fprintf(stderr,
10060                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10061                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10062                         dev_rec->type, dev_rec->offset);
10063                 return -1;
10064         } else {
10065                 return 0;
10066         }
10067 }
10068
10069 /* check btrfs_dev_item -> btrfs_dev_extent */
10070 static int check_devices(struct rb_root *dev_cache,
10071                          struct device_extent_tree *dev_extent_cache)
10072 {
10073         struct rb_node *dev_node;
10074         struct device_record *dev_rec;
10075         struct device_extent_record *dext_rec;
10076         int err;
10077         int ret = 0;
10078
10079         dev_node = rb_first(dev_cache);
10080         while (dev_node) {
10081                 dev_rec = container_of(dev_node, struct device_record, node);
10082                 err = check_device_used(dev_rec, dev_extent_cache);
10083                 if (err)
10084                         ret = err;
10085
10086                 dev_node = rb_next(dev_node);
10087         }
10088         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10089                             device_list) {
10090                 fprintf(stderr,
10091                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10092                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10093                 if (!ret)
10094                         ret = 1;
10095         }
10096         return ret;
10097 }
10098
10099 static int add_root_item_to_list(struct list_head *head,
10100                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10101                                   u8 level, u8 drop_level,
10102                                   struct btrfs_key *drop_key)
10103 {
10104
10105         struct root_item_record *ri_rec;
10106         ri_rec = malloc(sizeof(*ri_rec));
10107         if (!ri_rec)
10108                 return -ENOMEM;
10109         ri_rec->bytenr = bytenr;
10110         ri_rec->objectid = objectid;
10111         ri_rec->level = level;
10112         ri_rec->drop_level = drop_level;
10113         ri_rec->last_snapshot = last_snapshot;
10114         if (drop_key)
10115                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10116         list_add_tail(&ri_rec->list, head);
10117
10118         return 0;
10119 }
10120
10121 static void free_root_item_list(struct list_head *list)
10122 {
10123         struct root_item_record *ri_rec;
10124
10125         while (!list_empty(list)) {
10126                 ri_rec = list_first_entry(list, struct root_item_record,
10127                                           list);
10128                 list_del_init(&ri_rec->list);
10129                 free(ri_rec);
10130         }
10131 }
10132
10133 static int deal_root_from_list(struct list_head *list,
10134                                struct btrfs_root *root,
10135                                struct block_info *bits,
10136                                int bits_nr,
10137                                struct cache_tree *pending,
10138                                struct cache_tree *seen,
10139                                struct cache_tree *reada,
10140                                struct cache_tree *nodes,
10141                                struct cache_tree *extent_cache,
10142                                struct cache_tree *chunk_cache,
10143                                struct rb_root *dev_cache,
10144                                struct block_group_tree *block_group_cache,
10145                                struct device_extent_tree *dev_extent_cache)
10146 {
10147         int ret = 0;
10148         u64 last;
10149
10150         while (!list_empty(list)) {
10151                 struct root_item_record *rec;
10152                 struct extent_buffer *buf;
10153                 rec = list_entry(list->next,
10154                                  struct root_item_record, list);
10155                 last = 0;
10156                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10157                 if (!extent_buffer_uptodate(buf)) {
10158                         free_extent_buffer(buf);
10159                         ret = -EIO;
10160                         break;
10161                 }
10162                 ret = add_root_to_pending(buf, extent_cache, pending,
10163                                     seen, nodes, rec->objectid);
10164                 if (ret < 0)
10165                         break;
10166                 /*
10167                  * To rebuild extent tree, we need deal with snapshot
10168                  * one by one, otherwise we deal with node firstly which
10169                  * can maximize readahead.
10170                  */
10171                 while (1) {
10172                         ret = run_next_block(root, bits, bits_nr, &last,
10173                                              pending, seen, reada, nodes,
10174                                              extent_cache, chunk_cache,
10175                                              dev_cache, block_group_cache,
10176                                              dev_extent_cache, rec);
10177                         if (ret != 0)
10178                                 break;
10179                 }
10180                 free_extent_buffer(buf);
10181                 list_del(&rec->list);
10182                 free(rec);
10183                 if (ret < 0)
10184                         break;
10185         }
10186         while (ret >= 0) {
10187                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10188                                      reada, nodes, extent_cache, chunk_cache,
10189                                      dev_cache, block_group_cache,
10190                                      dev_extent_cache, NULL);
10191                 if (ret != 0) {
10192                         if (ret > 0)
10193                                 ret = 0;
10194                         break;
10195                 }
10196         }
10197         return ret;
10198 }
10199
10200 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10201 {
10202         struct rb_root dev_cache;
10203         struct cache_tree chunk_cache;
10204         struct block_group_tree block_group_cache;
10205         struct device_extent_tree dev_extent_cache;
10206         struct cache_tree extent_cache;
10207         struct cache_tree seen;
10208         struct cache_tree pending;
10209         struct cache_tree reada;
10210         struct cache_tree nodes;
10211         struct extent_io_tree excluded_extents;
10212         struct cache_tree corrupt_blocks;
10213         struct btrfs_path path;
10214         struct btrfs_key key;
10215         struct btrfs_key found_key;
10216         int ret, err = 0;
10217         struct block_info *bits;
10218         int bits_nr;
10219         struct extent_buffer *leaf;
10220         int slot;
10221         struct btrfs_root_item ri;
10222         struct list_head dropping_trees;
10223         struct list_head normal_trees;
10224         struct btrfs_root *root1;
10225         struct btrfs_root *root;
10226         u64 objectid;
10227         u8 level;
10228
10229         root = fs_info->fs_root;
10230         dev_cache = RB_ROOT;
10231         cache_tree_init(&chunk_cache);
10232         block_group_tree_init(&block_group_cache);
10233         device_extent_tree_init(&dev_extent_cache);
10234
10235         cache_tree_init(&extent_cache);
10236         cache_tree_init(&seen);
10237         cache_tree_init(&pending);
10238         cache_tree_init(&nodes);
10239         cache_tree_init(&reada);
10240         cache_tree_init(&corrupt_blocks);
10241         extent_io_tree_init(&excluded_extents);
10242         INIT_LIST_HEAD(&dropping_trees);
10243         INIT_LIST_HEAD(&normal_trees);
10244
10245         if (repair) {
10246                 fs_info->excluded_extents = &excluded_extents;
10247                 fs_info->fsck_extent_cache = &extent_cache;
10248                 fs_info->free_extent_hook = free_extent_hook;
10249                 fs_info->corrupt_blocks = &corrupt_blocks;
10250         }
10251
10252         bits_nr = 1024;
10253         bits = malloc(bits_nr * sizeof(struct block_info));
10254         if (!bits) {
10255                 perror("malloc");
10256                 exit(1);
10257         }
10258
10259         if (ctx.progress_enabled) {
10260                 ctx.tp = TASK_EXTENTS;
10261                 task_start(ctx.info);
10262         }
10263
10264 again:
10265         root1 = fs_info->tree_root;
10266         level = btrfs_header_level(root1->node);
10267         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10268                                     root1->node->start, 0, level, 0, NULL);
10269         if (ret < 0)
10270                 goto out;
10271         root1 = fs_info->chunk_root;
10272         level = btrfs_header_level(root1->node);
10273         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10274                                     root1->node->start, 0, level, 0, NULL);
10275         if (ret < 0)
10276                 goto out;
10277         btrfs_init_path(&path);
10278         key.offset = 0;
10279         key.objectid = 0;
10280         key.type = BTRFS_ROOT_ITEM_KEY;
10281         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10282         if (ret < 0)
10283                 goto out;
10284         while(1) {
10285                 leaf = path.nodes[0];
10286                 slot = path.slots[0];
10287                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10288                         ret = btrfs_next_leaf(root, &path);
10289                         if (ret != 0)
10290                                 break;
10291                         leaf = path.nodes[0];
10292                         slot = path.slots[0];
10293                 }
10294                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10295                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10296                         unsigned long offset;
10297                         u64 last_snapshot;
10298
10299                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10300                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10301                         last_snapshot = btrfs_root_last_snapshot(&ri);
10302                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10303                                 level = btrfs_root_level(&ri);
10304                                 ret = add_root_item_to_list(&normal_trees,
10305                                                 found_key.objectid,
10306                                                 btrfs_root_bytenr(&ri),
10307                                                 last_snapshot, level,
10308                                                 0, NULL);
10309                                 if (ret < 0)
10310                                         goto out;
10311                         } else {
10312                                 level = btrfs_root_level(&ri);
10313                                 objectid = found_key.objectid;
10314                                 btrfs_disk_key_to_cpu(&found_key,
10315                                                       &ri.drop_progress);
10316                                 ret = add_root_item_to_list(&dropping_trees,
10317                                                 objectid,
10318                                                 btrfs_root_bytenr(&ri),
10319                                                 last_snapshot, level,
10320                                                 ri.drop_level, &found_key);
10321                                 if (ret < 0)
10322                                         goto out;
10323                         }
10324                 }
10325                 path.slots[0]++;
10326         }
10327         btrfs_release_path(&path);
10328
10329         /*
10330          * check_block can return -EAGAIN if it fixes something, please keep
10331          * this in mind when dealing with return values from these functions, if
10332          * we get -EAGAIN we want to fall through and restart the loop.
10333          */
10334         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10335                                   &seen, &reada, &nodes, &extent_cache,
10336                                   &chunk_cache, &dev_cache, &block_group_cache,
10337                                   &dev_extent_cache);
10338         if (ret < 0) {
10339                 if (ret == -EAGAIN)
10340                         goto loop;
10341                 goto out;
10342         }
10343         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10344                                   &pending, &seen, &reada, &nodes,
10345                                   &extent_cache, &chunk_cache, &dev_cache,
10346                                   &block_group_cache, &dev_extent_cache);
10347         if (ret < 0) {
10348                 if (ret == -EAGAIN)
10349                         goto loop;
10350                 goto out;
10351         }
10352
10353         ret = check_chunks(&chunk_cache, &block_group_cache,
10354                            &dev_extent_cache, NULL, NULL, NULL, 0);
10355         if (ret) {
10356                 if (ret == -EAGAIN)
10357                         goto loop;
10358                 err = ret;
10359         }
10360
10361         ret = check_extent_refs(root, &extent_cache);
10362         if (ret < 0) {
10363                 if (ret == -EAGAIN)
10364                         goto loop;
10365                 goto out;
10366         }
10367
10368         ret = check_devices(&dev_cache, &dev_extent_cache);
10369         if (ret && err)
10370                 ret = err;
10371
10372 out:
10373         task_stop(ctx.info);
10374         if (repair) {
10375                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10376                 extent_io_tree_cleanup(&excluded_extents);
10377                 fs_info->fsck_extent_cache = NULL;
10378                 fs_info->free_extent_hook = NULL;
10379                 fs_info->corrupt_blocks = NULL;
10380                 fs_info->excluded_extents = NULL;
10381         }
10382         free(bits);
10383         free_chunk_cache_tree(&chunk_cache);
10384         free_device_cache_tree(&dev_cache);
10385         free_block_group_tree(&block_group_cache);
10386         free_device_extent_tree(&dev_extent_cache);
10387         free_extent_cache_tree(&seen);
10388         free_extent_cache_tree(&pending);
10389         free_extent_cache_tree(&reada);
10390         free_extent_cache_tree(&nodes);
10391         free_root_item_list(&normal_trees);
10392         free_root_item_list(&dropping_trees);
10393         return ret;
10394 loop:
10395         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10396         free_extent_cache_tree(&seen);
10397         free_extent_cache_tree(&pending);
10398         free_extent_cache_tree(&reada);
10399         free_extent_cache_tree(&nodes);
10400         free_chunk_cache_tree(&chunk_cache);
10401         free_block_group_tree(&block_group_cache);
10402         free_device_cache_tree(&dev_cache);
10403         free_device_extent_tree(&dev_extent_cache);
10404         free_extent_record_cache(&extent_cache);
10405         free_root_item_list(&normal_trees);
10406         free_root_item_list(&dropping_trees);
10407         extent_io_tree_cleanup(&excluded_extents);
10408         goto again;
10409 }
10410
10411 /*
10412  * Check backrefs of a tree block given by @bytenr or @eb.
10413  *
10414  * @root:       the root containing the @bytenr or @eb
10415  * @eb:         tree block extent buffer, can be NULL
10416  * @bytenr:     bytenr of the tree block to search
10417  * @level:      tree level of the tree block
10418  * @owner:      owner of the tree block
10419  *
10420  * Return >0 for any error found and output error message
10421  * Return 0 for no error found
10422  */
10423 static int check_tree_block_ref(struct btrfs_root *root,
10424                                 struct extent_buffer *eb, u64 bytenr,
10425                                 int level, u64 owner)
10426 {
10427         struct btrfs_key key;
10428         struct btrfs_root *extent_root = root->fs_info->extent_root;
10429         struct btrfs_path path;
10430         struct btrfs_extent_item *ei;
10431         struct btrfs_extent_inline_ref *iref;
10432         struct extent_buffer *leaf;
10433         unsigned long end;
10434         unsigned long ptr;
10435         int slot;
10436         int skinny_level;
10437         int type;
10438         u32 nodesize = root->fs_info->nodesize;
10439         u32 item_size;
10440         u64 offset;
10441         int tree_reloc_root = 0;
10442         int found_ref = 0;
10443         int err = 0;
10444         int ret;
10445
10446         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10447             btrfs_header_bytenr(root->node) == bytenr)
10448                 tree_reloc_root = 1;
10449
10450         btrfs_init_path(&path);
10451         key.objectid = bytenr;
10452         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10453                 key.type = BTRFS_METADATA_ITEM_KEY;
10454         else
10455                 key.type = BTRFS_EXTENT_ITEM_KEY;
10456         key.offset = (u64)-1;
10457
10458         /* Search for the backref in extent tree */
10459         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10460         if (ret < 0) {
10461                 err |= BACKREF_MISSING;
10462                 goto out;
10463         }
10464         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10465         if (ret) {
10466                 err |= BACKREF_MISSING;
10467                 goto out;
10468         }
10469
10470         leaf = path.nodes[0];
10471         slot = path.slots[0];
10472         btrfs_item_key_to_cpu(leaf, &key, slot);
10473
10474         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10475
10476         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10477                 skinny_level = (int)key.offset;
10478                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10479         } else {
10480                 struct btrfs_tree_block_info *info;
10481
10482                 info = (struct btrfs_tree_block_info *)(ei + 1);
10483                 skinny_level = btrfs_tree_block_level(leaf, info);
10484                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10485         }
10486
10487         if (eb) {
10488                 u64 header_gen;
10489                 u64 extent_gen;
10490
10491                 if (!(btrfs_extent_flags(leaf, ei) &
10492                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10493                         error(
10494                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10495                                 key.objectid, nodesize,
10496                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10497                         err = BACKREF_MISMATCH;
10498                 }
10499                 header_gen = btrfs_header_generation(eb);
10500                 extent_gen = btrfs_extent_generation(leaf, ei);
10501                 if (header_gen != extent_gen) {
10502                         error(
10503         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10504                                 key.objectid, nodesize, header_gen,
10505                                 extent_gen);
10506                         err = BACKREF_MISMATCH;
10507                 }
10508                 if (level != skinny_level) {
10509                         error(
10510                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10511                                 key.objectid, nodesize, level, skinny_level);
10512                         err = BACKREF_MISMATCH;
10513                 }
10514                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10515                         error(
10516                         "extent[%llu %u] is referred by other roots than %llu",
10517                                 key.objectid, nodesize, root->objectid);
10518                         err = BACKREF_MISMATCH;
10519                 }
10520         }
10521
10522         /*
10523          * Iterate the extent/metadata item to find the exact backref
10524          */
10525         item_size = btrfs_item_size_nr(leaf, slot);
10526         ptr = (unsigned long)iref;
10527         end = (unsigned long)ei + item_size;
10528         while (ptr < end) {
10529                 iref = (struct btrfs_extent_inline_ref *)ptr;
10530                 type = btrfs_extent_inline_ref_type(leaf, iref);
10531                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10532
10533                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10534                         (offset == root->objectid || offset == owner)) {
10535                         found_ref = 1;
10536                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10537                         /*
10538                          * Backref of tree reloc root points to itself, no need
10539                          * to check backref any more.
10540                          */
10541                         if (tree_reloc_root)
10542                                 found_ref = 1;
10543                         else
10544                         /* Check if the backref points to valid referencer */
10545                                 found_ref = !check_tree_block_ref(root, NULL,
10546                                                 offset, level + 1, owner);
10547                 }
10548
10549                 if (found_ref)
10550                         break;
10551                 ptr += btrfs_extent_inline_ref_size(type);
10552         }
10553
10554         /*
10555          * Inlined extent item doesn't have what we need, check
10556          * TREE_BLOCK_REF_KEY
10557          */
10558         if (!found_ref) {
10559                 btrfs_release_path(&path);
10560                 key.objectid = bytenr;
10561                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10562                 key.offset = root->objectid;
10563
10564                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10565                 if (!ret)
10566                         found_ref = 1;
10567         }
10568         if (!found_ref)
10569                 err |= BACKREF_MISSING;
10570 out:
10571         btrfs_release_path(&path);
10572         if (eb && (err & BACKREF_MISSING))
10573                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10574                         bytenr, nodesize, owner, level);
10575         return err;
10576 }
10577
10578 /*
10579  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10580  *
10581  * Return >0 any error found and output error message
10582  * Return 0 for no error found
10583  */
10584 static int check_extent_data_item(struct btrfs_root *root,
10585                                   struct extent_buffer *eb, int slot)
10586 {
10587         struct btrfs_file_extent_item *fi;
10588         struct btrfs_path path;
10589         struct btrfs_root *extent_root = root->fs_info->extent_root;
10590         struct btrfs_key fi_key;
10591         struct btrfs_key dbref_key;
10592         struct extent_buffer *leaf;
10593         struct btrfs_extent_item *ei;
10594         struct btrfs_extent_inline_ref *iref;
10595         struct btrfs_extent_data_ref *dref;
10596         u64 owner;
10597         u64 disk_bytenr;
10598         u64 disk_num_bytes;
10599         u64 extent_num_bytes;
10600         u64 extent_flags;
10601         u32 item_size;
10602         unsigned long end;
10603         unsigned long ptr;
10604         int type;
10605         u64 ref_root;
10606         int found_dbackref = 0;
10607         int err = 0;
10608         int ret;
10609
10610         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10611         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10612
10613         /* Nothing to check for hole and inline data extents */
10614         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10615             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10616                 return 0;
10617
10618         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10619         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10620         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10621
10622         /* Check unaligned disk_num_bytes and num_bytes */
10623         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10624                 error(
10625 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10626                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10627                         root->fs_info->sectorsize);
10628                 err |= BYTES_UNALIGNED;
10629         } else {
10630                 data_bytes_allocated += disk_num_bytes;
10631         }
10632         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10633                 error(
10634 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10635                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10636                         root->fs_info->sectorsize);
10637                 err |= BYTES_UNALIGNED;
10638         } else {
10639                 data_bytes_referenced += extent_num_bytes;
10640         }
10641         owner = btrfs_header_owner(eb);
10642
10643         /* Check the extent item of the file extent in extent tree */
10644         btrfs_init_path(&path);
10645         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10646         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10647         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10648
10649         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10650         if (ret)
10651                 goto out;
10652
10653         leaf = path.nodes[0];
10654         slot = path.slots[0];
10655         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10656
10657         extent_flags = btrfs_extent_flags(leaf, ei);
10658
10659         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10660                 error(
10661                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10662                     disk_bytenr, disk_num_bytes,
10663                     BTRFS_EXTENT_FLAG_DATA);
10664                 err |= BACKREF_MISMATCH;
10665         }
10666
10667         /* Check data backref inside that extent item */
10668         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10669         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10670         ptr = (unsigned long)iref;
10671         end = (unsigned long)ei + item_size;
10672         while (ptr < end) {
10673                 iref = (struct btrfs_extent_inline_ref *)ptr;
10674                 type = btrfs_extent_inline_ref_type(leaf, iref);
10675                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10676
10677                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10678                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10679                         if (ref_root == owner || ref_root == root->objectid)
10680                                 found_dbackref = 1;
10681                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10682                         found_dbackref = !check_tree_block_ref(root, NULL,
10683                                 btrfs_extent_inline_ref_offset(leaf, iref),
10684                                 0, owner);
10685                 }
10686
10687                 if (found_dbackref)
10688                         break;
10689                 ptr += btrfs_extent_inline_ref_size(type);
10690         }
10691
10692         if (!found_dbackref) {
10693                 btrfs_release_path(&path);
10694
10695                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10696                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10697                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10698                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10699                                 fi_key.objectid, fi_key.offset);
10700
10701                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10702                                         &dbref_key, &path, 0, 0);
10703                 if (!ret) {
10704                         found_dbackref = 1;
10705                         goto out;
10706                 }
10707
10708                 btrfs_release_path(&path);
10709
10710                 /*
10711                  * Neither inlined nor EXTENT_DATA_REF found, try
10712                  * SHARED_DATA_REF as last chance.
10713                  */
10714                 dbref_key.objectid = disk_bytenr;
10715                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10716                 dbref_key.offset = eb->start;
10717
10718                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10719                                         &dbref_key, &path, 0, 0);
10720                 if (!ret) {
10721                         found_dbackref = 1;
10722                         goto out;
10723                 }
10724         }
10725
10726 out:
10727         if (!found_dbackref)
10728                 err |= BACKREF_MISSING;
10729         btrfs_release_path(&path);
10730         if (err & BACKREF_MISSING) {
10731                 error("data extent[%llu %llu] backref lost",
10732                       disk_bytenr, disk_num_bytes);
10733         }
10734         return err;
10735 }
10736
10737 /*
10738  * Get real tree block level for the case like shared block
10739  * Return >= 0 as tree level
10740  * Return <0 for error
10741  */
10742 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10743 {
10744         struct extent_buffer *eb;
10745         struct btrfs_path path;
10746         struct btrfs_key key;
10747         struct btrfs_extent_item *ei;
10748         u64 flags;
10749         u64 transid;
10750         u8 backref_level;
10751         u8 header_level;
10752         int ret;
10753
10754         /* Search extent tree for extent generation and level */
10755         key.objectid = bytenr;
10756         key.type = BTRFS_METADATA_ITEM_KEY;
10757         key.offset = (u64)-1;
10758
10759         btrfs_init_path(&path);
10760         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10761         if (ret < 0)
10762                 goto release_out;
10763         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10764         if (ret < 0)
10765                 goto release_out;
10766         if (ret > 0) {
10767                 ret = -ENOENT;
10768                 goto release_out;
10769         }
10770
10771         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10772         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10773                             struct btrfs_extent_item);
10774         flags = btrfs_extent_flags(path.nodes[0], ei);
10775         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10776                 ret = -ENOENT;
10777                 goto release_out;
10778         }
10779
10780         /* Get transid for later read_tree_block() check */
10781         transid = btrfs_extent_generation(path.nodes[0], ei);
10782
10783         /* Get backref level as one source */
10784         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10785                 backref_level = key.offset;
10786         } else {
10787                 struct btrfs_tree_block_info *info;
10788
10789                 info = (struct btrfs_tree_block_info *)(ei + 1);
10790                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10791         }
10792         btrfs_release_path(&path);
10793
10794         /* Get level from tree block as an alternative source */
10795         eb = read_tree_block(fs_info, bytenr, transid);
10796         if (!extent_buffer_uptodate(eb)) {
10797                 free_extent_buffer(eb);
10798                 return -EIO;
10799         }
10800         header_level = btrfs_header_level(eb);
10801         free_extent_buffer(eb);
10802
10803         if (header_level != backref_level)
10804                 return -EIO;
10805         return header_level;
10806
10807 release_out:
10808         btrfs_release_path(&path);
10809         return ret;
10810 }
10811
10812 /*
10813  * Check if a tree block backref is valid (points to a valid tree block)
10814  * if level == -1, level will be resolved
10815  * Return >0 for any error found and print error message
10816  */
10817 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10818                                     u64 bytenr, int level)
10819 {
10820         struct btrfs_root *root;
10821         struct btrfs_key key;
10822         struct btrfs_path path;
10823         struct extent_buffer *eb;
10824         struct extent_buffer *node;
10825         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10826         int err = 0;
10827         int ret;
10828
10829         /* Query level for level == -1 special case */
10830         if (level == -1)
10831                 level = query_tree_block_level(fs_info, bytenr);
10832         if (level < 0) {
10833                 err |= REFERENCER_MISSING;
10834                 goto out;
10835         }
10836
10837         key.objectid = root_id;
10838         key.type = BTRFS_ROOT_ITEM_KEY;
10839         key.offset = (u64)-1;
10840
10841         root = btrfs_read_fs_root(fs_info, &key);
10842         if (IS_ERR(root)) {
10843                 err |= REFERENCER_MISSING;
10844                 goto out;
10845         }
10846
10847         /* Read out the tree block to get item/node key */
10848         eb = read_tree_block(fs_info, bytenr, 0);
10849         if (!extent_buffer_uptodate(eb)) {
10850                 err |= REFERENCER_MISSING;
10851                 free_extent_buffer(eb);
10852                 goto out;
10853         }
10854
10855         /* Empty tree, no need to check key */
10856         if (!btrfs_header_nritems(eb) && !level) {
10857                 free_extent_buffer(eb);
10858                 goto out;
10859         }
10860
10861         if (level)
10862                 btrfs_node_key_to_cpu(eb, &key, 0);
10863         else
10864                 btrfs_item_key_to_cpu(eb, &key, 0);
10865
10866         free_extent_buffer(eb);
10867
10868         btrfs_init_path(&path);
10869         path.lowest_level = level;
10870         /* Search with the first key, to ensure we can reach it */
10871         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10872         if (ret < 0) {
10873                 err |= REFERENCER_MISSING;
10874                 goto release_out;
10875         }
10876
10877         node = path.nodes[level];
10878         if (btrfs_header_bytenr(node) != bytenr) {
10879                 error(
10880         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10881                         bytenr, nodesize, bytenr,
10882                         btrfs_header_bytenr(node));
10883                 err |= REFERENCER_MISMATCH;
10884         }
10885         if (btrfs_header_level(node) != level) {
10886                 error(
10887         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10888                         bytenr, nodesize, level,
10889                         btrfs_header_level(node));
10890                 err |= REFERENCER_MISMATCH;
10891         }
10892
10893 release_out:
10894         btrfs_release_path(&path);
10895 out:
10896         if (err & REFERENCER_MISSING) {
10897                 if (level < 0)
10898                         error("extent [%llu %d] lost referencer (owner: %llu)",
10899                                 bytenr, nodesize, root_id);
10900                 else
10901                         error(
10902                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10903                                 bytenr, nodesize, root_id, level);
10904         }
10905
10906         return err;
10907 }
10908
10909 /*
10910  * Check if tree block @eb is tree reloc root.
10911  * Return 0 if it's not or any problem happens
10912  * Return 1 if it's a tree reloc root
10913  */
10914 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10915                                  struct extent_buffer *eb)
10916 {
10917         struct btrfs_root *tree_reloc_root;
10918         struct btrfs_key key;
10919         u64 bytenr = btrfs_header_bytenr(eb);
10920         u64 owner = btrfs_header_owner(eb);
10921         int ret = 0;
10922
10923         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10924         key.offset = owner;
10925         key.type = BTRFS_ROOT_ITEM_KEY;
10926
10927         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10928         if (IS_ERR(tree_reloc_root))
10929                 return 0;
10930
10931         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10932                 ret = 1;
10933         btrfs_free_fs_root(tree_reloc_root);
10934         return ret;
10935 }
10936
10937 /*
10938  * Check referencer for shared block backref
10939  * If level == -1, this function will resolve the level.
10940  */
10941 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10942                                      u64 parent, u64 bytenr, int level)
10943 {
10944         struct extent_buffer *eb;
10945         u32 nr;
10946         int found_parent = 0;
10947         int i;
10948
10949         eb = read_tree_block(fs_info, parent, 0);
10950         if (!extent_buffer_uptodate(eb))
10951                 goto out;
10952
10953         if (level == -1)
10954                 level = query_tree_block_level(fs_info, bytenr);
10955         if (level < 0)
10956                 goto out;
10957
10958         /* It's possible it's a tree reloc root */
10959         if (parent == bytenr) {
10960                 if (is_tree_reloc_root(fs_info, eb))
10961                         found_parent = 1;
10962                 goto out;
10963         }
10964
10965         if (level + 1 != btrfs_header_level(eb))
10966                 goto out;
10967
10968         nr = btrfs_header_nritems(eb);
10969         for (i = 0; i < nr; i++) {
10970                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10971                         found_parent = 1;
10972                         break;
10973                 }
10974         }
10975 out:
10976         free_extent_buffer(eb);
10977         if (!found_parent) {
10978                 error(
10979         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10980                         bytenr, fs_info->nodesize, parent, level);
10981                 return REFERENCER_MISSING;
10982         }
10983         return 0;
10984 }
10985
10986 /*
10987  * Check referencer for normal (inlined) data ref
10988  * If len == 0, it will be resolved by searching in extent tree
10989  */
10990 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10991                                      u64 root_id, u64 objectid, u64 offset,
10992                                      u64 bytenr, u64 len, u32 count)
10993 {
10994         struct btrfs_root *root;
10995         struct btrfs_root *extent_root = fs_info->extent_root;
10996         struct btrfs_key key;
10997         struct btrfs_path path;
10998         struct extent_buffer *leaf;
10999         struct btrfs_file_extent_item *fi;
11000         u32 found_count = 0;
11001         int slot;
11002         int ret = 0;
11003
11004         if (!len) {
11005                 key.objectid = bytenr;
11006                 key.type = BTRFS_EXTENT_ITEM_KEY;
11007                 key.offset = (u64)-1;
11008
11009                 btrfs_init_path(&path);
11010                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11011                 if (ret < 0)
11012                         goto out;
11013                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11014                 if (ret)
11015                         goto out;
11016                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11017                 if (key.objectid != bytenr ||
11018                     key.type != BTRFS_EXTENT_ITEM_KEY)
11019                         goto out;
11020                 len = key.offset;
11021                 btrfs_release_path(&path);
11022         }
11023         key.objectid = root_id;
11024         key.type = BTRFS_ROOT_ITEM_KEY;
11025         key.offset = (u64)-1;
11026         btrfs_init_path(&path);
11027
11028         root = btrfs_read_fs_root(fs_info, &key);
11029         if (IS_ERR(root))
11030                 goto out;
11031
11032         key.objectid = objectid;
11033         key.type = BTRFS_EXTENT_DATA_KEY;
11034         /*
11035          * It can be nasty as data backref offset is
11036          * file offset - file extent offset, which is smaller or
11037          * equal to original backref offset.  The only special case is
11038          * overflow.  So we need to special check and do further search.
11039          */
11040         key.offset = offset & (1ULL << 63) ? 0 : offset;
11041
11042         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11043         if (ret < 0)
11044                 goto out;
11045
11046         /*
11047          * Search afterwards to get correct one
11048          * NOTE: As we must do a comprehensive check on the data backref to
11049          * make sure the dref count also matches, we must iterate all file
11050          * extents for that inode.
11051          */
11052         while (1) {
11053                 leaf = path.nodes[0];
11054                 slot = path.slots[0];
11055
11056                 if (slot >= btrfs_header_nritems(leaf))
11057                         goto next;
11058                 btrfs_item_key_to_cpu(leaf, &key, slot);
11059                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11060                         break;
11061                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11062                 /*
11063                  * Except normal disk bytenr and disk num bytes, we still
11064                  * need to do extra check on dbackref offset as
11065                  * dbackref offset = file_offset - file_extent_offset
11066                  */
11067                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11068                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11069                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11070                     offset)
11071                         found_count++;
11072
11073 next:
11074                 ret = btrfs_next_item(root, &path);
11075                 if (ret)
11076                         break;
11077         }
11078 out:
11079         btrfs_release_path(&path);
11080         if (found_count != count) {
11081                 error(
11082 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11083                         bytenr, len, root_id, objectid, offset, count, found_count);
11084                 return REFERENCER_MISSING;
11085         }
11086         return 0;
11087 }
11088
11089 /*
11090  * Check if the referencer of a shared data backref exists
11091  */
11092 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11093                                      u64 parent, u64 bytenr)
11094 {
11095         struct extent_buffer *eb;
11096         struct btrfs_key key;
11097         struct btrfs_file_extent_item *fi;
11098         u32 nr;
11099         int found_parent = 0;
11100         int i;
11101
11102         eb = read_tree_block(fs_info, parent, 0);
11103         if (!extent_buffer_uptodate(eb))
11104                 goto out;
11105
11106         nr = btrfs_header_nritems(eb);
11107         for (i = 0; i < nr; i++) {
11108                 btrfs_item_key_to_cpu(eb, &key, i);
11109                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11110                         continue;
11111
11112                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11113                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11114                         continue;
11115
11116                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11117                         found_parent = 1;
11118                         break;
11119                 }
11120         }
11121
11122 out:
11123         free_extent_buffer(eb);
11124         if (!found_parent) {
11125                 error("shared extent %llu referencer lost (parent: %llu)",
11126                         bytenr, parent);
11127                 return REFERENCER_MISSING;
11128         }
11129         return 0;
11130 }
11131
11132 /*
11133  * This function will check a given extent item, including its backref and
11134  * itself (like crossing stripe boundary and type)
11135  *
11136  * Since we don't use extent_record anymore, introduce new error bit
11137  */
11138 static int check_extent_item(struct btrfs_fs_info *fs_info,
11139                              struct extent_buffer *eb, int slot)
11140 {
11141         struct btrfs_extent_item *ei;
11142         struct btrfs_extent_inline_ref *iref;
11143         struct btrfs_extent_data_ref *dref;
11144         unsigned long end;
11145         unsigned long ptr;
11146         int type;
11147         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11148         u32 item_size = btrfs_item_size_nr(eb, slot);
11149         u64 flags;
11150         u64 offset;
11151         int metadata = 0;
11152         int level;
11153         struct btrfs_key key;
11154         int ret;
11155         int err = 0;
11156
11157         btrfs_item_key_to_cpu(eb, &key, slot);
11158         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11159                 bytes_used += key.offset;
11160         else
11161                 bytes_used += nodesize;
11162
11163         if (item_size < sizeof(*ei)) {
11164                 /*
11165                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11166                  * old thing when on disk format is still un-determined.
11167                  * No need to care about it anymore
11168                  */
11169                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11170                 return -ENOTTY;
11171         }
11172
11173         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11174         flags = btrfs_extent_flags(eb, ei);
11175
11176         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11177                 metadata = 1;
11178         if (metadata && check_crossing_stripes(global_info, key.objectid,
11179                                                eb->len)) {
11180                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11181                       key.objectid, key.objectid + nodesize);
11182                 err |= CROSSING_STRIPE_BOUNDARY;
11183         }
11184
11185         ptr = (unsigned long)(ei + 1);
11186
11187         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11188                 /* Old EXTENT_ITEM metadata */
11189                 struct btrfs_tree_block_info *info;
11190
11191                 info = (struct btrfs_tree_block_info *)ptr;
11192                 level = btrfs_tree_block_level(eb, info);
11193                 ptr += sizeof(struct btrfs_tree_block_info);
11194         } else {
11195                 /* New METADATA_ITEM */
11196                 level = key.offset;
11197         }
11198         end = (unsigned long)ei + item_size;
11199
11200 next:
11201         /* Reached extent item end normally */
11202         if (ptr == end)
11203                 goto out;
11204
11205         /* Beyond extent item end, wrong item size */
11206         if (ptr > end) {
11207                 err |= ITEM_SIZE_MISMATCH;
11208                 error("extent item at bytenr %llu slot %d has wrong size",
11209                         eb->start, slot);
11210                 goto out;
11211         }
11212
11213         /* Now check every backref in this extent item */
11214         iref = (struct btrfs_extent_inline_ref *)ptr;
11215         type = btrfs_extent_inline_ref_type(eb, iref);
11216         offset = btrfs_extent_inline_ref_offset(eb, iref);
11217         switch (type) {
11218         case BTRFS_TREE_BLOCK_REF_KEY:
11219                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11220                                                level);
11221                 err |= ret;
11222                 break;
11223         case BTRFS_SHARED_BLOCK_REF_KEY:
11224                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11225                                                  level);
11226                 err |= ret;
11227                 break;
11228         case BTRFS_EXTENT_DATA_REF_KEY:
11229                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11230                 ret = check_extent_data_backref(fs_info,
11231                                 btrfs_extent_data_ref_root(eb, dref),
11232                                 btrfs_extent_data_ref_objectid(eb, dref),
11233                                 btrfs_extent_data_ref_offset(eb, dref),
11234                                 key.objectid, key.offset,
11235                                 btrfs_extent_data_ref_count(eb, dref));
11236                 err |= ret;
11237                 break;
11238         case BTRFS_SHARED_DATA_REF_KEY:
11239                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11240                 err |= ret;
11241                 break;
11242         default:
11243                 error("extent[%llu %d %llu] has unknown ref type: %d",
11244                         key.objectid, key.type, key.offset, type);
11245                 err |= UNKNOWN_TYPE;
11246                 goto out;
11247         }
11248
11249         ptr += btrfs_extent_inline_ref_size(type);
11250         goto next;
11251
11252 out:
11253         return err;
11254 }
11255
11256 /*
11257  * Check if a dev extent item is referred correctly by its chunk
11258  */
11259 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11260                                  struct extent_buffer *eb, int slot)
11261 {
11262         struct btrfs_root *chunk_root = fs_info->chunk_root;
11263         struct btrfs_dev_extent *ptr;
11264         struct btrfs_path path;
11265         struct btrfs_key chunk_key;
11266         struct btrfs_key devext_key;
11267         struct btrfs_chunk *chunk;
11268         struct extent_buffer *l;
11269         int num_stripes;
11270         u64 length;
11271         int i;
11272         int found_chunk = 0;
11273         int ret;
11274
11275         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11276         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11277         length = btrfs_dev_extent_length(eb, ptr);
11278
11279         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11280         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11281         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11282
11283         btrfs_init_path(&path);
11284         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11285         if (ret)
11286                 goto out;
11287
11288         l = path.nodes[0];
11289         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11290         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11291                                       chunk_key.offset);
11292         if (ret < 0)
11293                 goto out;
11294
11295         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11296                 goto out;
11297
11298         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11299         for (i = 0; i < num_stripes; i++) {
11300                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11301                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11302
11303                 if (devid == devext_key.objectid &&
11304                     offset == devext_key.offset) {
11305                         found_chunk = 1;
11306                         break;
11307                 }
11308         }
11309 out:
11310         btrfs_release_path(&path);
11311         if (!found_chunk) {
11312                 error(
11313                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11314                         devext_key.objectid, devext_key.offset, length);
11315                 return REFERENCER_MISSING;
11316         }
11317         return 0;
11318 }
11319
11320 /*
11321  * Check if the used space is correct with the dev item
11322  */
11323 static int check_dev_item(struct btrfs_fs_info *fs_info,
11324                           struct extent_buffer *eb, int slot)
11325 {
11326         struct btrfs_root *dev_root = fs_info->dev_root;
11327         struct btrfs_dev_item *dev_item;
11328         struct btrfs_path path;
11329         struct btrfs_key key;
11330         struct btrfs_dev_extent *ptr;
11331         u64 dev_id;
11332         u64 used;
11333         u64 total = 0;
11334         int ret;
11335
11336         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11337         dev_id = btrfs_device_id(eb, dev_item);
11338         used = btrfs_device_bytes_used(eb, dev_item);
11339
11340         key.objectid = dev_id;
11341         key.type = BTRFS_DEV_EXTENT_KEY;
11342         key.offset = 0;
11343
11344         btrfs_init_path(&path);
11345         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11346         if (ret < 0) {
11347                 btrfs_item_key_to_cpu(eb, &key, slot);
11348                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11349                         key.objectid, key.type, key.offset);
11350                 btrfs_release_path(&path);
11351                 return REFERENCER_MISSING;
11352         }
11353
11354         /* Iterate dev_extents to calculate the used space of a device */
11355         while (1) {
11356                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11357                         goto next;
11358
11359                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11360                 if (key.objectid > dev_id)
11361                         break;
11362                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11363                         goto next;
11364
11365                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11366                                      struct btrfs_dev_extent);
11367                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11368 next:
11369                 ret = btrfs_next_item(dev_root, &path);
11370                 if (ret)
11371                         break;
11372         }
11373         btrfs_release_path(&path);
11374
11375         if (used != total) {
11376                 btrfs_item_key_to_cpu(eb, &key, slot);
11377                 error(
11378 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11379                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11380                         BTRFS_DEV_EXTENT_KEY, dev_id);
11381                 return ACCOUNTING_MISMATCH;
11382         }
11383         return 0;
11384 }
11385
11386 /*
11387  * Check a block group item with its referener (chunk) and its used space
11388  * with extent/metadata item
11389  */
11390 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11391                                   struct extent_buffer *eb, int slot)
11392 {
11393         struct btrfs_root *extent_root = fs_info->extent_root;
11394         struct btrfs_root *chunk_root = fs_info->chunk_root;
11395         struct btrfs_block_group_item *bi;
11396         struct btrfs_block_group_item bg_item;
11397         struct btrfs_path path;
11398         struct btrfs_key bg_key;
11399         struct btrfs_key chunk_key;
11400         struct btrfs_key extent_key;
11401         struct btrfs_chunk *chunk;
11402         struct extent_buffer *leaf;
11403         struct btrfs_extent_item *ei;
11404         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11405         u64 flags;
11406         u64 bg_flags;
11407         u64 used;
11408         u64 total = 0;
11409         int ret;
11410         int err = 0;
11411
11412         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11413         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11414         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11415         used = btrfs_block_group_used(&bg_item);
11416         bg_flags = btrfs_block_group_flags(&bg_item);
11417
11418         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11419         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11420         chunk_key.offset = bg_key.objectid;
11421
11422         btrfs_init_path(&path);
11423         /* Search for the referencer chunk */
11424         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11425         if (ret) {
11426                 error(
11427                 "block group[%llu %llu] did not find the related chunk item",
11428                         bg_key.objectid, bg_key.offset);
11429                 err |= REFERENCER_MISSING;
11430         } else {
11431                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11432                                         struct btrfs_chunk);
11433                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11434                                                 bg_key.offset) {
11435                         error(
11436         "block group[%llu %llu] related chunk item length does not match",
11437                                 bg_key.objectid, bg_key.offset);
11438                         err |= REFERENCER_MISMATCH;
11439                 }
11440         }
11441         btrfs_release_path(&path);
11442
11443         /* Search from the block group bytenr */
11444         extent_key.objectid = bg_key.objectid;
11445         extent_key.type = 0;
11446         extent_key.offset = 0;
11447
11448         btrfs_init_path(&path);
11449         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11450         if (ret < 0)
11451                 goto out;
11452
11453         /* Iterate extent tree to account used space */
11454         while (1) {
11455                 leaf = path.nodes[0];
11456
11457                 /* Search slot can point to the last item beyond leaf nritems */
11458                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11459                         goto next;
11460
11461                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11462                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11463                         break;
11464
11465                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11466                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11467                         goto next;
11468                 if (extent_key.objectid < bg_key.objectid)
11469                         goto next;
11470
11471                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11472                         total += nodesize;
11473                 else
11474                         total += extent_key.offset;
11475
11476                 ei = btrfs_item_ptr(leaf, path.slots[0],
11477                                     struct btrfs_extent_item);
11478                 flags = btrfs_extent_flags(leaf, ei);
11479                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11480                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11481                                 error(
11482                         "bad extent[%llu, %llu) type mismatch with chunk",
11483                                         extent_key.objectid,
11484                                         extent_key.objectid + extent_key.offset);
11485                                 err |= CHUNK_TYPE_MISMATCH;
11486                         }
11487                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11488                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11489                                     BTRFS_BLOCK_GROUP_METADATA))) {
11490                                 error(
11491                         "bad extent[%llu, %llu) type mismatch with chunk",
11492                                         extent_key.objectid,
11493                                         extent_key.objectid + nodesize);
11494                                 err |= CHUNK_TYPE_MISMATCH;
11495                         }
11496                 }
11497 next:
11498                 ret = btrfs_next_item(extent_root, &path);
11499                 if (ret)
11500                         break;
11501         }
11502
11503 out:
11504         btrfs_release_path(&path);
11505
11506         if (total != used) {
11507                 error(
11508                 "block group[%llu %llu] used %llu but extent items used %llu",
11509                         bg_key.objectid, bg_key.offset, used, total);
11510                 err |= ACCOUNTING_MISMATCH;
11511         }
11512         return err;
11513 }
11514
11515 /*
11516  * Check a chunk item.
11517  * Including checking all referred dev_extents and block group
11518  */
11519 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11520                             struct extent_buffer *eb, int slot)
11521 {
11522         struct btrfs_root *extent_root = fs_info->extent_root;
11523         struct btrfs_root *dev_root = fs_info->dev_root;
11524         struct btrfs_path path;
11525         struct btrfs_key chunk_key;
11526         struct btrfs_key bg_key;
11527         struct btrfs_key devext_key;
11528         struct btrfs_chunk *chunk;
11529         struct extent_buffer *leaf;
11530         struct btrfs_block_group_item *bi;
11531         struct btrfs_block_group_item bg_item;
11532         struct btrfs_dev_extent *ptr;
11533         u64 length;
11534         u64 chunk_end;
11535         u64 stripe_len;
11536         u64 type;
11537         int num_stripes;
11538         u64 offset;
11539         u64 objectid;
11540         int i;
11541         int ret;
11542         int err = 0;
11543
11544         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11545         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11546         length = btrfs_chunk_length(eb, chunk);
11547         chunk_end = chunk_key.offset + length;
11548         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11549                                       chunk_key.offset);
11550         if (ret < 0) {
11551                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11552                         chunk_end);
11553                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11554                 goto out;
11555         }
11556         type = btrfs_chunk_type(eb, chunk);
11557
11558         bg_key.objectid = chunk_key.offset;
11559         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11560         bg_key.offset = length;
11561
11562         btrfs_init_path(&path);
11563         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11564         if (ret) {
11565                 error(
11566                 "chunk[%llu %llu) did not find the related block group item",
11567                         chunk_key.offset, chunk_end);
11568                 err |= REFERENCER_MISSING;
11569         } else{
11570                 leaf = path.nodes[0];
11571                 bi = btrfs_item_ptr(leaf, path.slots[0],
11572                                     struct btrfs_block_group_item);
11573                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11574                                    sizeof(bg_item));
11575                 if (btrfs_block_group_flags(&bg_item) != type) {
11576                         error(
11577 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11578                                 chunk_key.offset, chunk_end, type,
11579                                 btrfs_block_group_flags(&bg_item));
11580                         err |= REFERENCER_MISSING;
11581                 }
11582         }
11583
11584         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11585         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11586         for (i = 0; i < num_stripes; i++) {
11587                 btrfs_release_path(&path);
11588                 btrfs_init_path(&path);
11589                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11590                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11591                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11592
11593                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11594                                         0, 0);
11595                 if (ret)
11596                         goto not_match_dev;
11597
11598                 leaf = path.nodes[0];
11599                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11600                                      struct btrfs_dev_extent);
11601                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11602                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11603                 if (objectid != chunk_key.objectid ||
11604                     offset != chunk_key.offset ||
11605                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11606                         goto not_match_dev;
11607                 continue;
11608 not_match_dev:
11609                 err |= BACKREF_MISSING;
11610                 error(
11611                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11612                         chunk_key.objectid, chunk_end, i);
11613                 continue;
11614         }
11615         btrfs_release_path(&path);
11616 out:
11617         return err;
11618 }
11619
11620 /*
11621  * Main entry function to check known items and update related accounting info
11622  */
11623 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11624 {
11625         struct btrfs_fs_info *fs_info = root->fs_info;
11626         struct btrfs_key key;
11627         int slot = 0;
11628         int type;
11629         struct btrfs_extent_data_ref *dref;
11630         int ret;
11631         int err = 0;
11632
11633 next:
11634         btrfs_item_key_to_cpu(eb, &key, slot);
11635         type = key.type;
11636
11637         switch (type) {
11638         case BTRFS_EXTENT_DATA_KEY:
11639                 ret = check_extent_data_item(root, eb, slot);
11640                 err |= ret;
11641                 break;
11642         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11643                 ret = check_block_group_item(fs_info, eb, slot);
11644                 err |= ret;
11645                 break;
11646         case BTRFS_DEV_ITEM_KEY:
11647                 ret = check_dev_item(fs_info, eb, slot);
11648                 err |= ret;
11649                 break;
11650         case BTRFS_CHUNK_ITEM_KEY:
11651                 ret = check_chunk_item(fs_info, eb, slot);
11652                 err |= ret;
11653                 break;
11654         case BTRFS_DEV_EXTENT_KEY:
11655                 ret = check_dev_extent_item(fs_info, eb, slot);
11656                 err |= ret;
11657                 break;
11658         case BTRFS_EXTENT_ITEM_KEY:
11659         case BTRFS_METADATA_ITEM_KEY:
11660                 ret = check_extent_item(fs_info, eb, slot);
11661                 err |= ret;
11662                 break;
11663         case BTRFS_EXTENT_CSUM_KEY:
11664                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11665                 break;
11666         case BTRFS_TREE_BLOCK_REF_KEY:
11667                 ret = check_tree_block_backref(fs_info, key.offset,
11668                                                key.objectid, -1);
11669                 err |= ret;
11670                 break;
11671         case BTRFS_EXTENT_DATA_REF_KEY:
11672                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11673                 ret = check_extent_data_backref(fs_info,
11674                                 btrfs_extent_data_ref_root(eb, dref),
11675                                 btrfs_extent_data_ref_objectid(eb, dref),
11676                                 btrfs_extent_data_ref_offset(eb, dref),
11677                                 key.objectid, 0,
11678                                 btrfs_extent_data_ref_count(eb, dref));
11679                 err |= ret;
11680                 break;
11681         case BTRFS_SHARED_BLOCK_REF_KEY:
11682                 ret = check_shared_block_backref(fs_info, key.offset,
11683                                                  key.objectid, -1);
11684                 err |= ret;
11685                 break;
11686         case BTRFS_SHARED_DATA_REF_KEY:
11687                 ret = check_shared_data_backref(fs_info, key.offset,
11688                                                 key.objectid);
11689                 err |= ret;
11690                 break;
11691         default:
11692                 break;
11693         }
11694
11695         if (++slot < btrfs_header_nritems(eb))
11696                 goto next;
11697
11698         return err;
11699 }
11700
11701 /*
11702  * Helper function for later fs/subvol tree check.  To determine if a tree
11703  * block should be checked.
11704  * This function will ensure only the direct referencer with lowest rootid to
11705  * check a fs/subvolume tree block.
11706  *
11707  * Backref check at extent tree would detect errors like missing subvolume
11708  * tree, so we can do aggressive check to reduce duplicated checks.
11709  */
11710 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11711 {
11712         struct btrfs_root *extent_root = root->fs_info->extent_root;
11713         struct btrfs_key key;
11714         struct btrfs_path path;
11715         struct extent_buffer *leaf;
11716         int slot;
11717         struct btrfs_extent_item *ei;
11718         unsigned long ptr;
11719         unsigned long end;
11720         int type;
11721         u32 item_size;
11722         u64 offset;
11723         struct btrfs_extent_inline_ref *iref;
11724         int ret;
11725
11726         btrfs_init_path(&path);
11727         key.objectid = btrfs_header_bytenr(eb);
11728         key.type = BTRFS_METADATA_ITEM_KEY;
11729         key.offset = (u64)-1;
11730
11731         /*
11732          * Any failure in backref resolving means we can't determine
11733          * whom the tree block belongs to.
11734          * So in that case, we need to check that tree block
11735          */
11736         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11737         if (ret < 0)
11738                 goto need_check;
11739
11740         ret = btrfs_previous_extent_item(extent_root, &path,
11741                                          btrfs_header_bytenr(eb));
11742         if (ret)
11743                 goto need_check;
11744
11745         leaf = path.nodes[0];
11746         slot = path.slots[0];
11747         btrfs_item_key_to_cpu(leaf, &key, slot);
11748         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11749
11750         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11751                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11752         } else {
11753                 struct btrfs_tree_block_info *info;
11754
11755                 info = (struct btrfs_tree_block_info *)(ei + 1);
11756                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11757         }
11758
11759         item_size = btrfs_item_size_nr(leaf, slot);
11760         ptr = (unsigned long)iref;
11761         end = (unsigned long)ei + item_size;
11762         while (ptr < end) {
11763                 iref = (struct btrfs_extent_inline_ref *)ptr;
11764                 type = btrfs_extent_inline_ref_type(leaf, iref);
11765                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11766
11767                 /*
11768                  * We only check the tree block if current root is
11769                  * the lowest referencer of it.
11770                  */
11771                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11772                     offset < root->objectid) {
11773                         btrfs_release_path(&path);
11774                         return 0;
11775                 }
11776
11777                 ptr += btrfs_extent_inline_ref_size(type);
11778         }
11779         /*
11780          * Normally we should also check keyed tree block ref, but that may be
11781          * very time consuming.  Inlined ref should already make us skip a lot
11782          * of refs now.  So skip search keyed tree block ref.
11783          */
11784
11785 need_check:
11786         btrfs_release_path(&path);
11787         return 1;
11788 }
11789
11790 /*
11791  * Traversal function for tree block. We will do:
11792  * 1) Skip shared fs/subvolume tree blocks
11793  * 2) Update related bytes accounting
11794  * 3) Pre-order traversal
11795  */
11796 static int traverse_tree_block(struct btrfs_root *root,
11797                                 struct extent_buffer *node)
11798 {
11799         struct extent_buffer *eb;
11800         struct btrfs_key key;
11801         struct btrfs_key drop_key;
11802         int level;
11803         u64 nr;
11804         int i;
11805         int err = 0;
11806         int ret;
11807
11808         /*
11809          * Skip shared fs/subvolume tree block, in that case they will
11810          * be checked by referencer with lowest rootid
11811          */
11812         if (is_fstree(root->objectid) && !should_check(root, node))
11813                 return 0;
11814
11815         /* Update bytes accounting */
11816         total_btree_bytes += node->len;
11817         if (fs_root_objectid(btrfs_header_owner(node)))
11818                 total_fs_tree_bytes += node->len;
11819         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11820                 total_extent_tree_bytes += node->len;
11821
11822         /* pre-order tranversal, check itself first */
11823         level = btrfs_header_level(node);
11824         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11825                                    btrfs_header_level(node),
11826                                    btrfs_header_owner(node));
11827         err |= ret;
11828         if (err)
11829                 error(
11830         "check %s failed root %llu bytenr %llu level %d, force continue check",
11831                         level ? "node":"leaf", root->objectid,
11832                         btrfs_header_bytenr(node), btrfs_header_level(node));
11833
11834         if (!level) {
11835                 btree_space_waste += btrfs_leaf_free_space(root, node);
11836                 ret = check_leaf_items(root, node);
11837                 err |= ret;
11838                 return err;
11839         }
11840
11841         nr = btrfs_header_nritems(node);
11842         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11843         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11844                 sizeof(struct btrfs_key_ptr);
11845
11846         /* Then check all its children */
11847         for (i = 0; i < nr; i++) {
11848                 u64 blocknr = btrfs_node_blockptr(node, i);
11849
11850                 btrfs_node_key_to_cpu(node, &key, i);
11851                 if (level == root->root_item.drop_level &&
11852                     is_dropped_key(&key, &drop_key))
11853                         continue;
11854
11855                 /*
11856                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11857                  * to call the function itself.
11858                  */
11859                 eb = read_tree_block(root->fs_info, blocknr, 0);
11860                 if (extent_buffer_uptodate(eb)) {
11861                         ret = traverse_tree_block(root, eb);
11862                         err |= ret;
11863                 }
11864                 free_extent_buffer(eb);
11865         }
11866
11867         return err;
11868 }
11869
11870 /*
11871  * Low memory usage version check_chunks_and_extents.
11872  */
11873 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11874 {
11875         struct btrfs_path path;
11876         struct btrfs_key key;
11877         struct btrfs_root *root1;
11878         struct btrfs_root *root;
11879         struct btrfs_root *cur_root;
11880         int err = 0;
11881         int ret;
11882
11883         root = fs_info->fs_root;
11884
11885         root1 = root->fs_info->chunk_root;
11886         ret = traverse_tree_block(root1, root1->node);
11887         err |= ret;
11888
11889         root1 = root->fs_info->tree_root;
11890         ret = traverse_tree_block(root1, root1->node);
11891         err |= ret;
11892
11893         btrfs_init_path(&path);
11894         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11895         key.offset = 0;
11896         key.type = BTRFS_ROOT_ITEM_KEY;
11897
11898         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11899         if (ret) {
11900                 error("cannot find extent treet in tree_root");
11901                 goto out;
11902         }
11903
11904         while (1) {
11905                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11906                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11907                         goto next;
11908                 key.offset = (u64)-1;
11909
11910                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11911                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11912                                         &key);
11913                 else
11914                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11915                 if (IS_ERR(cur_root) || !cur_root) {
11916                         error("failed to read tree: %lld", key.objectid);
11917                         goto next;
11918                 }
11919
11920                 ret = traverse_tree_block(cur_root, cur_root->node);
11921                 err |= ret;
11922
11923                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11924                         btrfs_free_fs_root(cur_root);
11925 next:
11926                 ret = btrfs_next_item(root1, &path);
11927                 if (ret)
11928                         goto out;
11929         }
11930
11931 out:
11932         btrfs_release_path(&path);
11933         return err;
11934 }
11935
11936 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11937 {
11938         int ret;
11939
11940         if (!ctx.progress_enabled)
11941                 fprintf(stderr, "checking extents\n");
11942         if (check_mode == CHECK_MODE_LOWMEM)
11943                 ret = check_chunks_and_extents_v2(fs_info);
11944         else
11945                 ret = check_chunks_and_extents(fs_info);
11946
11947         return ret;
11948 }
11949
11950 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11951                            struct btrfs_root *root, int overwrite)
11952 {
11953         struct extent_buffer *c;
11954         struct extent_buffer *old = root->node;
11955         int level;
11956         int ret;
11957         struct btrfs_disk_key disk_key = {0,0,0};
11958
11959         level = 0;
11960
11961         if (overwrite) {
11962                 c = old;
11963                 extent_buffer_get(c);
11964                 goto init;
11965         }
11966         c = btrfs_alloc_free_block(trans, root,
11967                                    root->fs_info->nodesize,
11968                                    root->root_key.objectid,
11969                                    &disk_key, level, 0, 0);
11970         if (IS_ERR(c)) {
11971                 c = old;
11972                 extent_buffer_get(c);
11973                 overwrite = 1;
11974         }
11975 init:
11976         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11977         btrfs_set_header_level(c, level);
11978         btrfs_set_header_bytenr(c, c->start);
11979         btrfs_set_header_generation(c, trans->transid);
11980         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11981         btrfs_set_header_owner(c, root->root_key.objectid);
11982
11983         write_extent_buffer(c, root->fs_info->fsid,
11984                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11985
11986         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11987                             btrfs_header_chunk_tree_uuid(c),
11988                             BTRFS_UUID_SIZE);
11989
11990         btrfs_mark_buffer_dirty(c);
11991         /*
11992          * this case can happen in the following case:
11993          *
11994          * 1.overwrite previous root.
11995          *
11996          * 2.reinit reloc data root, this is because we skip pin
11997          * down reloc data tree before which means we can allocate
11998          * same block bytenr here.
11999          */
12000         if (old->start == c->start) {
12001                 btrfs_set_root_generation(&root->root_item,
12002                                           trans->transid);
12003                 root->root_item.level = btrfs_header_level(root->node);
12004                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12005                                         &root->root_key, &root->root_item);
12006                 if (ret) {
12007                         free_extent_buffer(c);
12008                         return ret;
12009                 }
12010         }
12011         free_extent_buffer(old);
12012         root->node = c;
12013         add_root_to_dirty_list(root);
12014         return 0;
12015 }
12016
12017 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12018                                 struct extent_buffer *eb, int tree_root)
12019 {
12020         struct extent_buffer *tmp;
12021         struct btrfs_root_item *ri;
12022         struct btrfs_key key;
12023         u64 bytenr;
12024         int level = btrfs_header_level(eb);
12025         int nritems;
12026         int ret;
12027         int i;
12028
12029         /*
12030          * If we have pinned this block before, don't pin it again.
12031          * This can not only avoid forever loop with broken filesystem
12032          * but also give us some speedups.
12033          */
12034         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12035                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12036                 return 0;
12037
12038         btrfs_pin_extent(fs_info, eb->start, eb->len);
12039
12040         nritems = btrfs_header_nritems(eb);
12041         for (i = 0; i < nritems; i++) {
12042                 if (level == 0) {
12043                         btrfs_item_key_to_cpu(eb, &key, i);
12044                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12045                                 continue;
12046                         /* Skip the extent root and reloc roots */
12047                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12048                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12049                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12050                                 continue;
12051                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12052                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12053
12054                         /*
12055                          * If at any point we start needing the real root we
12056                          * will have to build a stump root for the root we are
12057                          * in, but for now this doesn't actually use the root so
12058                          * just pass in extent_root.
12059                          */
12060                         tmp = read_tree_block(fs_info, bytenr, 0);
12061                         if (!extent_buffer_uptodate(tmp)) {
12062                                 fprintf(stderr, "Error reading root block\n");
12063                                 return -EIO;
12064                         }
12065                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12066                         free_extent_buffer(tmp);
12067                         if (ret)
12068                                 return ret;
12069                 } else {
12070                         bytenr = btrfs_node_blockptr(eb, i);
12071
12072                         /* If we aren't the tree root don't read the block */
12073                         if (level == 1 && !tree_root) {
12074                                 btrfs_pin_extent(fs_info, bytenr,
12075                                                 fs_info->nodesize);
12076                                 continue;
12077                         }
12078
12079                         tmp = read_tree_block(fs_info, bytenr, 0);
12080                         if (!extent_buffer_uptodate(tmp)) {
12081                                 fprintf(stderr, "Error reading tree block\n");
12082                                 return -EIO;
12083                         }
12084                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12085                         free_extent_buffer(tmp);
12086                         if (ret)
12087                                 return ret;
12088                 }
12089         }
12090
12091         return 0;
12092 }
12093
12094 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12095 {
12096         int ret;
12097
12098         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12099         if (ret)
12100                 return ret;
12101
12102         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12103 }
12104
12105 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12106 {
12107         struct btrfs_block_group_cache *cache;
12108         struct btrfs_path path;
12109         struct extent_buffer *leaf;
12110         struct btrfs_chunk *chunk;
12111         struct btrfs_key key;
12112         int ret;
12113         u64 start;
12114
12115         btrfs_init_path(&path);
12116         key.objectid = 0;
12117         key.type = BTRFS_CHUNK_ITEM_KEY;
12118         key.offset = 0;
12119         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12120         if (ret < 0) {
12121                 btrfs_release_path(&path);
12122                 return ret;
12123         }
12124
12125         /*
12126          * We do this in case the block groups were screwed up and had alloc
12127          * bits that aren't actually set on the chunks.  This happens with
12128          * restored images every time and could happen in real life I guess.
12129          */
12130         fs_info->avail_data_alloc_bits = 0;
12131         fs_info->avail_metadata_alloc_bits = 0;
12132         fs_info->avail_system_alloc_bits = 0;
12133
12134         /* First we need to create the in-memory block groups */
12135         while (1) {
12136                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12137                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12138                         if (ret < 0) {
12139                                 btrfs_release_path(&path);
12140                                 return ret;
12141                         }
12142                         if (ret) {
12143                                 ret = 0;
12144                                 break;
12145                         }
12146                 }
12147                 leaf = path.nodes[0];
12148                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12149                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12150                         path.slots[0]++;
12151                         continue;
12152                 }
12153
12154                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12155                 btrfs_add_block_group(fs_info, 0,
12156                                       btrfs_chunk_type(leaf, chunk),
12157                                       key.objectid, key.offset,
12158                                       btrfs_chunk_length(leaf, chunk));
12159                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12160                                  key.offset + btrfs_chunk_length(leaf, chunk));
12161                 path.slots[0]++;
12162         }
12163         start = 0;
12164         while (1) {
12165                 cache = btrfs_lookup_first_block_group(fs_info, start);
12166                 if (!cache)
12167                         break;
12168                 cache->cached = 1;
12169                 start = cache->key.objectid + cache->key.offset;
12170         }
12171
12172         btrfs_release_path(&path);
12173         return 0;
12174 }
12175
12176 static int reset_balance(struct btrfs_trans_handle *trans,
12177                          struct btrfs_fs_info *fs_info)
12178 {
12179         struct btrfs_root *root = fs_info->tree_root;
12180         struct btrfs_path path;
12181         struct extent_buffer *leaf;
12182         struct btrfs_key key;
12183         int del_slot, del_nr = 0;
12184         int ret;
12185         int found = 0;
12186
12187         btrfs_init_path(&path);
12188         key.objectid = BTRFS_BALANCE_OBJECTID;
12189         key.type = BTRFS_BALANCE_ITEM_KEY;
12190         key.offset = 0;
12191         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12192         if (ret) {
12193                 if (ret > 0)
12194                         ret = 0;
12195                 if (!ret)
12196                         goto reinit_data_reloc;
12197                 else
12198                         goto out;
12199         }
12200
12201         ret = btrfs_del_item(trans, root, &path);
12202         if (ret)
12203                 goto out;
12204         btrfs_release_path(&path);
12205
12206         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12207         key.type = BTRFS_ROOT_ITEM_KEY;
12208         key.offset = 0;
12209         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12210         if (ret < 0)
12211                 goto out;
12212         while (1) {
12213                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12214                         if (!found)
12215                                 break;
12216
12217                         if (del_nr) {
12218                                 ret = btrfs_del_items(trans, root, &path,
12219                                                       del_slot, del_nr);
12220                                 del_nr = 0;
12221                                 if (ret)
12222                                         goto out;
12223                         }
12224                         key.offset++;
12225                         btrfs_release_path(&path);
12226
12227                         found = 0;
12228                         ret = btrfs_search_slot(trans, root, &key, &path,
12229                                                 -1, 1);
12230                         if (ret < 0)
12231                                 goto out;
12232                         continue;
12233                 }
12234                 found = 1;
12235                 leaf = path.nodes[0];
12236                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12237                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12238                         break;
12239                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12240                         path.slots[0]++;
12241                         continue;
12242                 }
12243                 if (!del_nr) {
12244                         del_slot = path.slots[0];
12245                         del_nr = 1;
12246                 } else {
12247                         del_nr++;
12248                 }
12249                 path.slots[0]++;
12250         }
12251
12252         if (del_nr) {
12253                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12254                 if (ret)
12255                         goto out;
12256         }
12257         btrfs_release_path(&path);
12258
12259 reinit_data_reloc:
12260         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12261         key.type = BTRFS_ROOT_ITEM_KEY;
12262         key.offset = (u64)-1;
12263         root = btrfs_read_fs_root(fs_info, &key);
12264         if (IS_ERR(root)) {
12265                 fprintf(stderr, "Error reading data reloc tree\n");
12266                 ret = PTR_ERR(root);
12267                 goto out;
12268         }
12269         record_root_in_trans(trans, root);
12270         ret = btrfs_fsck_reinit_root(trans, root, 0);
12271         if (ret)
12272                 goto out;
12273         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12274 out:
12275         btrfs_release_path(&path);
12276         return ret;
12277 }
12278
12279 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12280                               struct btrfs_fs_info *fs_info)
12281 {
12282         u64 start = 0;
12283         int ret;
12284
12285         /*
12286          * The only reason we don't do this is because right now we're just
12287          * walking the trees we find and pinning down their bytes, we don't look
12288          * at any of the leaves.  In order to do mixed groups we'd have to check
12289          * the leaves of any fs roots and pin down the bytes for any file
12290          * extents we find.  Not hard but why do it if we don't have to?
12291          */
12292         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12293                 fprintf(stderr, "We don't support re-initing the extent tree "
12294                         "for mixed block groups yet, please notify a btrfs "
12295                         "developer you want to do this so they can add this "
12296                         "functionality.\n");
12297                 return -EINVAL;
12298         }
12299
12300         /*
12301          * first we need to walk all of the trees except the extent tree and pin
12302          * down the bytes that are in use so we don't overwrite any existing
12303          * metadata.
12304          */
12305         ret = pin_metadata_blocks(fs_info);
12306         if (ret) {
12307                 fprintf(stderr, "error pinning down used bytes\n");
12308                 return ret;
12309         }
12310
12311         /*
12312          * Need to drop all the block groups since we're going to recreate all
12313          * of them again.
12314          */
12315         btrfs_free_block_groups(fs_info);
12316         ret = reset_block_groups(fs_info);
12317         if (ret) {
12318                 fprintf(stderr, "error resetting the block groups\n");
12319                 return ret;
12320         }
12321
12322         /* Ok we can allocate now, reinit the extent root */
12323         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12324         if (ret) {
12325                 fprintf(stderr, "extent root initialization failed\n");
12326                 /*
12327                  * When the transaction code is updated we should end the
12328                  * transaction, but for now progs only knows about commit so
12329                  * just return an error.
12330                  */
12331                 return ret;
12332         }
12333
12334         /*
12335          * Now we have all the in-memory block groups setup so we can make
12336          * allocations properly, and the metadata we care about is safe since we
12337          * pinned all of it above.
12338          */
12339         while (1) {
12340                 struct btrfs_block_group_cache *cache;
12341
12342                 cache = btrfs_lookup_first_block_group(fs_info, start);
12343                 if (!cache)
12344                         break;
12345                 start = cache->key.objectid + cache->key.offset;
12346                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12347                                         &cache->key, &cache->item,
12348                                         sizeof(cache->item));
12349                 if (ret) {
12350                         fprintf(stderr, "Error adding block group\n");
12351                         return ret;
12352                 }
12353                 btrfs_extent_post_op(trans, fs_info->extent_root);
12354         }
12355
12356         ret = reset_balance(trans, fs_info);
12357         if (ret)
12358                 fprintf(stderr, "error resetting the pending balance\n");
12359
12360         return ret;
12361 }
12362
12363 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12364 {
12365         struct btrfs_path path;
12366         struct btrfs_trans_handle *trans;
12367         struct btrfs_key key;
12368         int ret;
12369
12370         printf("Recowing metadata block %llu\n", eb->start);
12371         key.objectid = btrfs_header_owner(eb);
12372         key.type = BTRFS_ROOT_ITEM_KEY;
12373         key.offset = (u64)-1;
12374
12375         root = btrfs_read_fs_root(root->fs_info, &key);
12376         if (IS_ERR(root)) {
12377                 fprintf(stderr, "Couldn't find owner root %llu\n",
12378                         key.objectid);
12379                 return PTR_ERR(root);
12380         }
12381
12382         trans = btrfs_start_transaction(root, 1);
12383         if (IS_ERR(trans))
12384                 return PTR_ERR(trans);
12385
12386         btrfs_init_path(&path);
12387         path.lowest_level = btrfs_header_level(eb);
12388         if (path.lowest_level)
12389                 btrfs_node_key_to_cpu(eb, &key, 0);
12390         else
12391                 btrfs_item_key_to_cpu(eb, &key, 0);
12392
12393         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12394         btrfs_commit_transaction(trans, root);
12395         btrfs_release_path(&path);
12396         return ret;
12397 }
12398
12399 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12400 {
12401         struct btrfs_path path;
12402         struct btrfs_trans_handle *trans;
12403         struct btrfs_key key;
12404         int ret;
12405
12406         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12407                bad->key.type, bad->key.offset);
12408         key.objectid = bad->root_id;
12409         key.type = BTRFS_ROOT_ITEM_KEY;
12410         key.offset = (u64)-1;
12411
12412         root = btrfs_read_fs_root(root->fs_info, &key);
12413         if (IS_ERR(root)) {
12414                 fprintf(stderr, "Couldn't find owner root %llu\n",
12415                         key.objectid);
12416                 return PTR_ERR(root);
12417         }
12418
12419         trans = btrfs_start_transaction(root, 1);
12420         if (IS_ERR(trans))
12421                 return PTR_ERR(trans);
12422
12423         btrfs_init_path(&path);
12424         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12425         if (ret) {
12426                 if (ret > 0)
12427                         ret = 0;
12428                 goto out;
12429         }
12430         ret = btrfs_del_item(trans, root, &path);
12431 out:
12432         btrfs_commit_transaction(trans, root);
12433         btrfs_release_path(&path);
12434         return ret;
12435 }
12436
12437 static int zero_log_tree(struct btrfs_root *root)
12438 {
12439         struct btrfs_trans_handle *trans;
12440         int ret;
12441
12442         trans = btrfs_start_transaction(root, 1);
12443         if (IS_ERR(trans)) {
12444                 ret = PTR_ERR(trans);
12445                 return ret;
12446         }
12447         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12448         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12449         ret = btrfs_commit_transaction(trans, root);
12450         return ret;
12451 }
12452
12453 static int populate_csum(struct btrfs_trans_handle *trans,
12454                          struct btrfs_root *csum_root, char *buf, u64 start,
12455                          u64 len)
12456 {
12457         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12458         u64 offset = 0;
12459         u64 sectorsize;
12460         int ret = 0;
12461
12462         while (offset < len) {
12463                 sectorsize = fs_info->sectorsize;
12464                 ret = read_extent_data(fs_info, buf, start + offset,
12465                                        &sectorsize, 0);
12466                 if (ret)
12467                         break;
12468                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12469                                             start + offset, buf, sectorsize);
12470                 if (ret)
12471                         break;
12472                 offset += sectorsize;
12473         }
12474         return ret;
12475 }
12476
12477 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12478                                       struct btrfs_root *csum_root,
12479                                       struct btrfs_root *cur_root)
12480 {
12481         struct btrfs_path path;
12482         struct btrfs_key key;
12483         struct extent_buffer *node;
12484         struct btrfs_file_extent_item *fi;
12485         char *buf = NULL;
12486         u64 start = 0;
12487         u64 len = 0;
12488         int slot = 0;
12489         int ret = 0;
12490
12491         buf = malloc(cur_root->fs_info->sectorsize);
12492         if (!buf)
12493                 return -ENOMEM;
12494
12495         btrfs_init_path(&path);
12496         key.objectid = 0;
12497         key.offset = 0;
12498         key.type = 0;
12499         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12500         if (ret < 0)
12501                 goto out;
12502         /* Iterate all regular file extents and fill its csum */
12503         while (1) {
12504                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12505
12506                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12507                         goto next;
12508                 node = path.nodes[0];
12509                 slot = path.slots[0];
12510                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12511                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12512                         goto next;
12513                 start = btrfs_file_extent_disk_bytenr(node, fi);
12514                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12515
12516                 ret = populate_csum(trans, csum_root, buf, start, len);
12517                 if (ret == -EEXIST)
12518                         ret = 0;
12519                 if (ret < 0)
12520                         goto out;
12521 next:
12522                 /*
12523                  * TODO: if next leaf is corrupted, jump to nearest next valid
12524                  * leaf.
12525                  */
12526                 ret = btrfs_next_item(cur_root, &path);
12527                 if (ret < 0)
12528                         goto out;
12529                 if (ret > 0) {
12530                         ret = 0;
12531                         goto out;
12532                 }
12533         }
12534
12535 out:
12536         btrfs_release_path(&path);
12537         free(buf);
12538         return ret;
12539 }
12540
12541 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12542                                   struct btrfs_root *csum_root)
12543 {
12544         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12545         struct btrfs_path path;
12546         struct btrfs_root *tree_root = fs_info->tree_root;
12547         struct btrfs_root *cur_root;
12548         struct extent_buffer *node;
12549         struct btrfs_key key;
12550         int slot = 0;
12551         int ret = 0;
12552
12553         btrfs_init_path(&path);
12554         key.objectid = BTRFS_FS_TREE_OBJECTID;
12555         key.offset = 0;
12556         key.type = BTRFS_ROOT_ITEM_KEY;
12557         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12558         if (ret < 0)
12559                 goto out;
12560         if (ret > 0) {
12561                 ret = -ENOENT;
12562                 goto out;
12563         }
12564
12565         while (1) {
12566                 node = path.nodes[0];
12567                 slot = path.slots[0];
12568                 btrfs_item_key_to_cpu(node, &key, slot);
12569                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12570                         goto out;
12571                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12572                         goto next;
12573                 if (!is_fstree(key.objectid))
12574                         goto next;
12575                 key.offset = (u64)-1;
12576
12577                 cur_root = btrfs_read_fs_root(fs_info, &key);
12578                 if (IS_ERR(cur_root) || !cur_root) {
12579                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12580                                 key.objectid);
12581                         goto out;
12582                 }
12583                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12584                                 cur_root);
12585                 if (ret < 0)
12586                         goto out;
12587 next:
12588                 ret = btrfs_next_item(tree_root, &path);
12589                 if (ret > 0) {
12590                         ret = 0;
12591                         goto out;
12592                 }
12593                 if (ret < 0)
12594                         goto out;
12595         }
12596
12597 out:
12598         btrfs_release_path(&path);
12599         return ret;
12600 }
12601
12602 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12603                                       struct btrfs_root *csum_root)
12604 {
12605         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12606         struct btrfs_path path;
12607         struct btrfs_extent_item *ei;
12608         struct extent_buffer *leaf;
12609         char *buf;
12610         struct btrfs_key key;
12611         int ret;
12612
12613         btrfs_init_path(&path);
12614         key.objectid = 0;
12615         key.type = BTRFS_EXTENT_ITEM_KEY;
12616         key.offset = 0;
12617         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12618         if (ret < 0) {
12619                 btrfs_release_path(&path);
12620                 return ret;
12621         }
12622
12623         buf = malloc(csum_root->fs_info->sectorsize);
12624         if (!buf) {
12625                 btrfs_release_path(&path);
12626                 return -ENOMEM;
12627         }
12628
12629         while (1) {
12630                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12631                         ret = btrfs_next_leaf(extent_root, &path);
12632                         if (ret < 0)
12633                                 break;
12634                         if (ret) {
12635                                 ret = 0;
12636                                 break;
12637                         }
12638                 }
12639                 leaf = path.nodes[0];
12640
12641                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12642                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12643                         path.slots[0]++;
12644                         continue;
12645                 }
12646
12647                 ei = btrfs_item_ptr(leaf, path.slots[0],
12648                                     struct btrfs_extent_item);
12649                 if (!(btrfs_extent_flags(leaf, ei) &
12650                       BTRFS_EXTENT_FLAG_DATA)) {
12651                         path.slots[0]++;
12652                         continue;
12653                 }
12654
12655                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12656                                     key.offset);
12657                 if (ret)
12658                         break;
12659                 path.slots[0]++;
12660         }
12661
12662         btrfs_release_path(&path);
12663         free(buf);
12664         return ret;
12665 }
12666
12667 /*
12668  * Recalculate the csum and put it into the csum tree.
12669  *
12670  * Extent tree init will wipe out all the extent info, so in that case, we
12671  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12672  * will use fs/subvol trees to init the csum tree.
12673  */
12674 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12675                           struct btrfs_root *csum_root,
12676                           int search_fs_tree)
12677 {
12678         if (search_fs_tree)
12679                 return fill_csum_tree_from_fs(trans, csum_root);
12680         else
12681                 return fill_csum_tree_from_extent(trans, csum_root);
12682 }
12683
12684 static void free_roots_info_cache(void)
12685 {
12686         if (!roots_info_cache)
12687                 return;
12688
12689         while (!cache_tree_empty(roots_info_cache)) {
12690                 struct cache_extent *entry;
12691                 struct root_item_info *rii;
12692
12693                 entry = first_cache_extent(roots_info_cache);
12694                 if (!entry)
12695                         break;
12696                 remove_cache_extent(roots_info_cache, entry);
12697                 rii = container_of(entry, struct root_item_info, cache_extent);
12698                 free(rii);
12699         }
12700
12701         free(roots_info_cache);
12702         roots_info_cache = NULL;
12703 }
12704
12705 static int build_roots_info_cache(struct btrfs_fs_info *info)
12706 {
12707         int ret = 0;
12708         struct btrfs_key key;
12709         struct extent_buffer *leaf;
12710         struct btrfs_path path;
12711
12712         if (!roots_info_cache) {
12713                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12714                 if (!roots_info_cache)
12715                         return -ENOMEM;
12716                 cache_tree_init(roots_info_cache);
12717         }
12718
12719         btrfs_init_path(&path);
12720         key.objectid = 0;
12721         key.type = BTRFS_EXTENT_ITEM_KEY;
12722         key.offset = 0;
12723         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12724         if (ret < 0)
12725                 goto out;
12726         leaf = path.nodes[0];
12727
12728         while (1) {
12729                 struct btrfs_key found_key;
12730                 struct btrfs_extent_item *ei;
12731                 struct btrfs_extent_inline_ref *iref;
12732                 int slot = path.slots[0];
12733                 int type;
12734                 u64 flags;
12735                 u64 root_id;
12736                 u8 level;
12737                 struct cache_extent *entry;
12738                 struct root_item_info *rii;
12739
12740                 if (slot >= btrfs_header_nritems(leaf)) {
12741                         ret = btrfs_next_leaf(info->extent_root, &path);
12742                         if (ret < 0) {
12743                                 break;
12744                         } else if (ret) {
12745                                 ret = 0;
12746                                 break;
12747                         }
12748                         leaf = path.nodes[0];
12749                         slot = path.slots[0];
12750                 }
12751
12752                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12753
12754                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12755                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12756                         goto next;
12757
12758                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12759                 flags = btrfs_extent_flags(leaf, ei);
12760
12761                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12762                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12763                         goto next;
12764
12765                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12766                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12767                         level = found_key.offset;
12768                 } else {
12769                         struct btrfs_tree_block_info *binfo;
12770
12771                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12772                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12773                         level = btrfs_tree_block_level(leaf, binfo);
12774                 }
12775
12776                 /*
12777                  * For a root extent, it must be of the following type and the
12778                  * first (and only one) iref in the item.
12779                  */
12780                 type = btrfs_extent_inline_ref_type(leaf, iref);
12781                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12782                         goto next;
12783
12784                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12785                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12786                 if (!entry) {
12787                         rii = malloc(sizeof(struct root_item_info));
12788                         if (!rii) {
12789                                 ret = -ENOMEM;
12790                                 goto out;
12791                         }
12792                         rii->cache_extent.start = root_id;
12793                         rii->cache_extent.size = 1;
12794                         rii->level = (u8)-1;
12795                         entry = &rii->cache_extent;
12796                         ret = insert_cache_extent(roots_info_cache, entry);
12797                         ASSERT(ret == 0);
12798                 } else {
12799                         rii = container_of(entry, struct root_item_info,
12800                                            cache_extent);
12801                 }
12802
12803                 ASSERT(rii->cache_extent.start == root_id);
12804                 ASSERT(rii->cache_extent.size == 1);
12805
12806                 if (level > rii->level || rii->level == (u8)-1) {
12807                         rii->level = level;
12808                         rii->bytenr = found_key.objectid;
12809                         rii->gen = btrfs_extent_generation(leaf, ei);
12810                         rii->node_count = 1;
12811                 } else if (level == rii->level) {
12812                         rii->node_count++;
12813                 }
12814 next:
12815                 path.slots[0]++;
12816         }
12817
12818 out:
12819         btrfs_release_path(&path);
12820
12821         return ret;
12822 }
12823
12824 static int maybe_repair_root_item(struct btrfs_path *path,
12825                                   const struct btrfs_key *root_key,
12826                                   const int read_only_mode)
12827 {
12828         const u64 root_id = root_key->objectid;
12829         struct cache_extent *entry;
12830         struct root_item_info *rii;
12831         struct btrfs_root_item ri;
12832         unsigned long offset;
12833
12834         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12835         if (!entry) {
12836                 fprintf(stderr,
12837                         "Error: could not find extent items for root %llu\n",
12838                         root_key->objectid);
12839                 return -ENOENT;
12840         }
12841
12842         rii = container_of(entry, struct root_item_info, cache_extent);
12843         ASSERT(rii->cache_extent.start == root_id);
12844         ASSERT(rii->cache_extent.size == 1);
12845
12846         if (rii->node_count != 1) {
12847                 fprintf(stderr,
12848                         "Error: could not find btree root extent for root %llu\n",
12849                         root_id);
12850                 return -ENOENT;
12851         }
12852
12853         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12854         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12855
12856         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12857             btrfs_root_level(&ri) != rii->level ||
12858             btrfs_root_generation(&ri) != rii->gen) {
12859
12860                 /*
12861                  * If we're in repair mode but our caller told us to not update
12862                  * the root item, i.e. just check if it needs to be updated, don't
12863                  * print this message, since the caller will call us again shortly
12864                  * for the same root item without read only mode (the caller will
12865                  * open a transaction first).
12866                  */
12867                 if (!(read_only_mode && repair))
12868                         fprintf(stderr,
12869                                 "%sroot item for root %llu,"
12870                                 " current bytenr %llu, current gen %llu, current level %u,"
12871                                 " new bytenr %llu, new gen %llu, new level %u\n",
12872                                 (read_only_mode ? "" : "fixing "),
12873                                 root_id,
12874                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12875                                 btrfs_root_level(&ri),
12876                                 rii->bytenr, rii->gen, rii->level);
12877
12878                 if (btrfs_root_generation(&ri) > rii->gen) {
12879                         fprintf(stderr,
12880                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12881                                 root_id, btrfs_root_generation(&ri), rii->gen);
12882                         return -EINVAL;
12883                 }
12884
12885                 if (!read_only_mode) {
12886                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12887                         btrfs_set_root_level(&ri, rii->level);
12888                         btrfs_set_root_generation(&ri, rii->gen);
12889                         write_extent_buffer(path->nodes[0], &ri,
12890                                             offset, sizeof(ri));
12891                 }
12892
12893                 return 1;
12894         }
12895
12896         return 0;
12897 }
12898
12899 /*
12900  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12901  * caused read-only snapshots to be corrupted if they were created at a moment
12902  * when the source subvolume/snapshot had orphan items. The issue was that the
12903  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12904  * node instead of the post orphan cleanup root node.
12905  * So this function, and its callees, just detects and fixes those cases. Even
12906  * though the regression was for read-only snapshots, this function applies to
12907  * any snapshot/subvolume root.
12908  * This must be run before any other repair code - not doing it so, makes other
12909  * repair code delete or modify backrefs in the extent tree for example, which
12910  * will result in an inconsistent fs after repairing the root items.
12911  */
12912 static int repair_root_items(struct btrfs_fs_info *info)
12913 {
12914         struct btrfs_path path;
12915         struct btrfs_key key;
12916         struct extent_buffer *leaf;
12917         struct btrfs_trans_handle *trans = NULL;
12918         int ret = 0;
12919         int bad_roots = 0;
12920         int need_trans = 0;
12921
12922         btrfs_init_path(&path);
12923
12924         ret = build_roots_info_cache(info);
12925         if (ret)
12926                 goto out;
12927
12928         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12929         key.type = BTRFS_ROOT_ITEM_KEY;
12930         key.offset = 0;
12931
12932 again:
12933         /*
12934          * Avoid opening and committing transactions if a leaf doesn't have
12935          * any root items that need to be fixed, so that we avoid rotating
12936          * backup roots unnecessarily.
12937          */
12938         if (need_trans) {
12939                 trans = btrfs_start_transaction(info->tree_root, 1);
12940                 if (IS_ERR(trans)) {
12941                         ret = PTR_ERR(trans);
12942                         goto out;
12943                 }
12944         }
12945
12946         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12947                                 0, trans ? 1 : 0);
12948         if (ret < 0)
12949                 goto out;
12950         leaf = path.nodes[0];
12951
12952         while (1) {
12953                 struct btrfs_key found_key;
12954
12955                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12956                         int no_more_keys = find_next_key(&path, &key);
12957
12958                         btrfs_release_path(&path);
12959                         if (trans) {
12960                                 ret = btrfs_commit_transaction(trans,
12961                                                                info->tree_root);
12962                                 trans = NULL;
12963                                 if (ret < 0)
12964                                         goto out;
12965                         }
12966                         need_trans = 0;
12967                         if (no_more_keys)
12968                                 break;
12969                         goto again;
12970                 }
12971
12972                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12973
12974                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12975                         goto next;
12976                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12977                         goto next;
12978
12979                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12980                 if (ret < 0)
12981                         goto out;
12982                 if (ret) {
12983                         if (!trans && repair) {
12984                                 need_trans = 1;
12985                                 key = found_key;
12986                                 btrfs_release_path(&path);
12987                                 goto again;
12988                         }
12989                         bad_roots++;
12990                 }
12991 next:
12992                 path.slots[0]++;
12993         }
12994         ret = 0;
12995 out:
12996         free_roots_info_cache();
12997         btrfs_release_path(&path);
12998         if (trans)
12999                 btrfs_commit_transaction(trans, info->tree_root);
13000         if (ret < 0)
13001                 return ret;
13002
13003         return bad_roots;
13004 }
13005
13006 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13007 {
13008         struct btrfs_trans_handle *trans;
13009         struct btrfs_block_group_cache *bg_cache;
13010         u64 current = 0;
13011         int ret = 0;
13012
13013         /* Clear all free space cache inodes and its extent data */
13014         while (1) {
13015                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13016                 if (!bg_cache)
13017                         break;
13018                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13019                 if (ret < 0)
13020                         return ret;
13021                 current = bg_cache->key.objectid + bg_cache->key.offset;
13022         }
13023
13024         /* Don't forget to set cache_generation to -1 */
13025         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13026         if (IS_ERR(trans)) {
13027                 error("failed to update super block cache generation");
13028                 return PTR_ERR(trans);
13029         }
13030         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13031         btrfs_commit_transaction(trans, fs_info->tree_root);
13032
13033         return ret;
13034 }
13035
13036 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13037                 int clear_version)
13038 {
13039         int ret = 0;
13040
13041         if (clear_version == 1) {
13042                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13043                         error(
13044                 "free space cache v2 detected, use --clear-space-cache v2");
13045                         ret = 1;
13046                         goto close_out;
13047                 }
13048                 printf("Clearing free space cache\n");
13049                 ret = clear_free_space_cache(fs_info);
13050                 if (ret) {
13051                         error("failed to clear free space cache");
13052                         ret = 1;
13053                 } else {
13054                         printf("Free space cache cleared\n");
13055                 }
13056         } else if (clear_version == 2) {
13057                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13058                         printf("no free space cache v2 to clear\n");
13059                         ret = 0;
13060                         goto close_out;
13061                 }
13062                 printf("Clear free space cache v2\n");
13063                 ret = btrfs_clear_free_space_tree(fs_info);
13064                 if (ret) {
13065                         error("failed to clear free space cache v2: %d", ret);
13066                         ret = 1;
13067                 } else {
13068                         printf("free space cache v2 cleared\n");
13069                 }
13070         }
13071 close_out:
13072         return ret;
13073 }
13074
13075 const char * const cmd_check_usage[] = {
13076         "btrfs check [options] <device>",
13077         "Check structural integrity of a filesystem (unmounted).",
13078         "Check structural integrity of an unmounted filesystem. Verify internal",
13079         "trees' consistency and item connectivity. In the repair mode try to",
13080         "fix the problems found. ",
13081         "WARNING: the repair mode is considered dangerous",
13082         "",
13083         "-s|--super <superblock>     use this superblock copy",
13084         "-b|--backup                 use the first valid backup root copy",
13085         "--force                     skip mount checks, repair is not possible",
13086         "--repair                    try to repair the filesystem",
13087         "--readonly                  run in read-only mode (default)",
13088         "--init-csum-tree            create a new CRC tree",
13089         "--init-extent-tree          create a new extent tree",
13090         "--mode <MODE>               allows choice of memory/IO trade-offs",
13091         "                            where MODE is one of:",
13092         "                            original - read inodes and extents to memory (requires",
13093         "                                       more memory, does less IO)",
13094         "                            lowmem   - try to use less memory but read blocks again",
13095         "                                       when needed",
13096         "--check-data-csum           verify checksums of data blocks",
13097         "-Q|--qgroup-report          print a report on qgroup consistency",
13098         "-E|--subvol-extents <subvolid>",
13099         "                            print subvolume extents and sharing state",
13100         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13101         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13102         "-p|--progress               indicate progress",
13103         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13104         NULL
13105 };
13106
13107 int cmd_check(int argc, char **argv)
13108 {
13109         struct cache_tree root_cache;
13110         struct btrfs_root *root;
13111         struct btrfs_fs_info *info;
13112         u64 bytenr = 0;
13113         u64 subvolid = 0;
13114         u64 tree_root_bytenr = 0;
13115         u64 chunk_root_bytenr = 0;
13116         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13117         int ret = 0;
13118         int err = 0;
13119         u64 num;
13120         int init_csum_tree = 0;
13121         int readonly = 0;
13122         int clear_space_cache = 0;
13123         int qgroup_report = 0;
13124         int qgroups_repaired = 0;
13125         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13126         int force = 0;
13127
13128         while(1) {
13129                 int c;
13130                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13131                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13132                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13133                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13134                         GETOPT_VAL_FORCE };
13135                 static const struct option long_options[] = {
13136                         { "super", required_argument, NULL, 's' },
13137                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13138                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13139                         { "init-csum-tree", no_argument, NULL,
13140                                 GETOPT_VAL_INIT_CSUM },
13141                         { "init-extent-tree", no_argument, NULL,
13142                                 GETOPT_VAL_INIT_EXTENT },
13143                         { "check-data-csum", no_argument, NULL,
13144                                 GETOPT_VAL_CHECK_CSUM },
13145                         { "backup", no_argument, NULL, 'b' },
13146                         { "subvol-extents", required_argument, NULL, 'E' },
13147                         { "qgroup-report", no_argument, NULL, 'Q' },
13148                         { "tree-root", required_argument, NULL, 'r' },
13149                         { "chunk-root", required_argument, NULL,
13150                                 GETOPT_VAL_CHUNK_TREE },
13151                         { "progress", no_argument, NULL, 'p' },
13152                         { "mode", required_argument, NULL,
13153                                 GETOPT_VAL_MODE },
13154                         { "clear-space-cache", required_argument, NULL,
13155                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13156                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13157                         { NULL, 0, NULL, 0}
13158                 };
13159
13160                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13161                 if (c < 0)
13162                         break;
13163                 switch(c) {
13164                         case 'a': /* ignored */ break;
13165                         case 'b':
13166                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13167                                 break;
13168                         case 's':
13169                                 num = arg_strtou64(optarg);
13170                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13171                                         error(
13172                                         "super mirror should be less than %d",
13173                                                 BTRFS_SUPER_MIRROR_MAX);
13174                                         exit(1);
13175                                 }
13176                                 bytenr = btrfs_sb_offset(((int)num));
13177                                 printf("using SB copy %llu, bytenr %llu\n", num,
13178                                        (unsigned long long)bytenr);
13179                                 break;
13180                         case 'Q':
13181                                 qgroup_report = 1;
13182                                 break;
13183                         case 'E':
13184                                 subvolid = arg_strtou64(optarg);
13185                                 break;
13186                         case 'r':
13187                                 tree_root_bytenr = arg_strtou64(optarg);
13188                                 break;
13189                         case GETOPT_VAL_CHUNK_TREE:
13190                                 chunk_root_bytenr = arg_strtou64(optarg);
13191                                 break;
13192                         case 'p':
13193                                 ctx.progress_enabled = true;
13194                                 break;
13195                         case '?':
13196                         case 'h':
13197                                 usage(cmd_check_usage);
13198                         case GETOPT_VAL_REPAIR:
13199                                 printf("enabling repair mode\n");
13200                                 repair = 1;
13201                                 ctree_flags |= OPEN_CTREE_WRITES;
13202                                 break;
13203                         case GETOPT_VAL_READONLY:
13204                                 readonly = 1;
13205                                 break;
13206                         case GETOPT_VAL_INIT_CSUM:
13207                                 printf("Creating a new CRC tree\n");
13208                                 init_csum_tree = 1;
13209                                 repair = 1;
13210                                 ctree_flags |= OPEN_CTREE_WRITES;
13211                                 break;
13212                         case GETOPT_VAL_INIT_EXTENT:
13213                                 init_extent_tree = 1;
13214                                 ctree_flags |= (OPEN_CTREE_WRITES |
13215                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13216                                 repair = 1;
13217                                 break;
13218                         case GETOPT_VAL_CHECK_CSUM:
13219                                 check_data_csum = 1;
13220                                 break;
13221                         case GETOPT_VAL_MODE:
13222                                 check_mode = parse_check_mode(optarg);
13223                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13224                                         error("unknown mode: %s", optarg);
13225                                         exit(1);
13226                                 }
13227                                 break;
13228                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13229                                 if (strcmp(optarg, "v1") == 0) {
13230                                         clear_space_cache = 1;
13231                                 } else if (strcmp(optarg, "v2") == 0) {
13232                                         clear_space_cache = 2;
13233                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13234                                 } else {
13235                                         error(
13236                 "invalid argument to --clear-space-cache, must be v1 or v2");
13237                                         exit(1);
13238                                 }
13239                                 ctree_flags |= OPEN_CTREE_WRITES;
13240                                 break;
13241                         case GETOPT_VAL_FORCE:
13242                                 force = 1;
13243                                 break;
13244                 }
13245         }
13246
13247         if (check_argc_exact(argc - optind, 1))
13248                 usage(cmd_check_usage);
13249
13250         if (ctx.progress_enabled) {
13251                 ctx.tp = TASK_NOTHING;
13252                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13253         }
13254
13255         /* This check is the only reason for --readonly to exist */
13256         if (readonly && repair) {
13257                 error("repair options are not compatible with --readonly");
13258                 exit(1);
13259         }
13260
13261         /*
13262          * experimental and dangerous
13263          */
13264         if (repair && check_mode == CHECK_MODE_LOWMEM)
13265                 warning("low-memory mode repair support is only partial");
13266
13267         radix_tree_init();
13268         cache_tree_init(&root_cache);
13269
13270         ret = check_mounted(argv[optind]);
13271         if (!force) {
13272                 if (ret < 0) {
13273                         error("could not check mount status: %s",
13274                                         strerror(-ret));
13275                         err |= !!ret;
13276                         goto err_out;
13277                 } else if (ret) {
13278                         error(
13279 "%s is currently mounted, use --force if you really intend to check the filesystem",
13280                                 argv[optind]);
13281                         ret = -EBUSY;
13282                         err |= !!ret;
13283                         goto err_out;
13284                 }
13285         } else {
13286                 if (repair) {
13287                         error("repair and --force is not yet supported");
13288                         ret = 1;
13289                         err |= !!ret;
13290                         goto err_out;
13291                 }
13292                 if (ret < 0) {
13293                         warning(
13294 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13295                                 argv[optind]);
13296                 } else if (ret) {
13297                         warning(
13298                         "filesystem mounted, continuing because of --force");
13299                 }
13300                 /* A block device is mounted in exclusive mode by kernel */
13301                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13302         }
13303
13304         /* only allow partial opening under repair mode */
13305         if (repair)
13306                 ctree_flags |= OPEN_CTREE_PARTIAL;
13307
13308         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13309                                   chunk_root_bytenr, ctree_flags);
13310         if (!info) {
13311                 error("cannot open file system");
13312                 ret = -EIO;
13313                 err |= !!ret;
13314                 goto err_out;
13315         }
13316
13317         global_info = info;
13318         root = info->fs_root;
13319         uuid_unparse(info->super_copy->fsid, uuidbuf);
13320
13321         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13322
13323         /*
13324          * Check the bare minimum before starting anything else that could rely
13325          * on it, namely the tree roots, any local consistency checks
13326          */
13327         if (!extent_buffer_uptodate(info->tree_root->node) ||
13328             !extent_buffer_uptodate(info->dev_root->node) ||
13329             !extent_buffer_uptodate(info->chunk_root->node)) {
13330                 error("critical roots corrupted, unable to check the filesystem");
13331                 err |= !!ret;
13332                 ret = -EIO;
13333                 goto close_out;
13334         }
13335
13336         if (clear_space_cache) {
13337                 ret = do_clear_free_space_cache(info, clear_space_cache);
13338                 err |= !!ret;
13339                 goto close_out;
13340         }
13341
13342         /*
13343          * repair mode will force us to commit transaction which
13344          * will make us fail to load log tree when mounting.
13345          */
13346         if (repair && btrfs_super_log_root(info->super_copy)) {
13347                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13348                 if (!ret) {
13349                         ret = 1;
13350                         err |= !!ret;
13351                         goto close_out;
13352                 }
13353                 ret = zero_log_tree(root);
13354                 err |= !!ret;
13355                 if (ret) {
13356                         error("failed to zero log tree: %d", ret);
13357                         goto close_out;
13358                 }
13359         }
13360
13361         if (qgroup_report) {
13362                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13363                        uuidbuf);
13364                 ret = qgroup_verify_all(info);
13365                 err |= !!ret;
13366                 if (ret == 0)
13367                         report_qgroups(1);
13368                 goto close_out;
13369         }
13370         if (subvolid) {
13371                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13372                        subvolid, argv[optind], uuidbuf);
13373                 ret = print_extent_state(info, subvolid);
13374                 err |= !!ret;
13375                 goto close_out;
13376         }
13377
13378         if (init_extent_tree || init_csum_tree) {
13379                 struct btrfs_trans_handle *trans;
13380
13381                 trans = btrfs_start_transaction(info->extent_root, 0);
13382                 if (IS_ERR(trans)) {
13383                         error("error starting transaction");
13384                         ret = PTR_ERR(trans);
13385                         err |= !!ret;
13386                         goto close_out;
13387                 }
13388
13389                 if (init_extent_tree) {
13390                         printf("Creating a new extent tree\n");
13391                         ret = reinit_extent_tree(trans, info);
13392                         err |= !!ret;
13393                         if (ret)
13394                                 goto close_out;
13395                 }
13396
13397                 if (init_csum_tree) {
13398                         printf("Reinitialize checksum tree\n");
13399                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13400                         if (ret) {
13401                                 error("checksum tree initialization failed: %d",
13402                                                 ret);
13403                                 ret = -EIO;
13404                                 err |= !!ret;
13405                                 goto close_out;
13406                         }
13407
13408                         ret = fill_csum_tree(trans, info->csum_root,
13409                                              init_extent_tree);
13410                         err |= !!ret;
13411                         if (ret) {
13412                                 error("checksum tree refilling failed: %d", ret);
13413                                 return -EIO;
13414                         }
13415                 }
13416                 /*
13417                  * Ok now we commit and run the normal fsck, which will add
13418                  * extent entries for all of the items it finds.
13419                  */
13420                 ret = btrfs_commit_transaction(trans, info->extent_root);
13421                 err |= !!ret;
13422                 if (ret)
13423                         goto close_out;
13424         }
13425         if (!extent_buffer_uptodate(info->extent_root->node)) {
13426                 error("critical: extent_root, unable to check the filesystem");
13427                 ret = -EIO;
13428                 err |= !!ret;
13429                 goto close_out;
13430         }
13431         if (!extent_buffer_uptodate(info->csum_root->node)) {
13432                 error("critical: csum_root, unable to check the filesystem");
13433                 ret = -EIO;
13434                 err |= !!ret;
13435                 goto close_out;
13436         }
13437
13438         ret = do_check_chunks_and_extents(info);
13439         err |= !!ret;
13440         if (ret)
13441                 error(
13442                 "errors found in extent allocation tree or chunk allocation");
13443
13444         ret = repair_root_items(info);
13445         err |= !!ret;
13446         if (ret < 0) {
13447                 error("failed to repair root items: %s", strerror(-ret));
13448                 goto close_out;
13449         }
13450         if (repair) {
13451                 fprintf(stderr, "Fixed %d roots.\n", ret);
13452                 ret = 0;
13453         } else if (ret > 0) {
13454                 fprintf(stderr,
13455                        "Found %d roots with an outdated root item.\n",
13456                        ret);
13457                 fprintf(stderr,
13458                         "Please run a filesystem check with the option --repair to fix them.\n");
13459                 ret = 1;
13460                 err |= !!ret;
13461                 goto close_out;
13462         }
13463
13464         if (!ctx.progress_enabled) {
13465                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13466                         fprintf(stderr, "checking free space tree\n");
13467                 else
13468                         fprintf(stderr, "checking free space cache\n");
13469         }
13470         ret = check_space_cache(root);
13471         err |= !!ret;
13472         if (ret) {
13473                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13474                         error("errors found in free space tree");
13475                 else
13476                         error("errors found in free space cache");
13477                 goto out;
13478         }
13479
13480         /*
13481          * We used to have to have these hole extents in between our real
13482          * extents so if we don't have this flag set we need to make sure there
13483          * are no gaps in the file extents for inodes, otherwise we can just
13484          * ignore it when this happens.
13485          */
13486         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13487         ret = do_check_fs_roots(info, &root_cache);
13488         err |= !!ret;
13489         if (ret) {
13490                 error("errors found in fs roots");
13491                 goto out;
13492         }
13493
13494         fprintf(stderr, "checking csums\n");
13495         ret = check_csums(root);
13496         err |= !!ret;
13497         if (ret) {
13498                 error("errors found in csum tree");
13499                 goto out;
13500         }
13501
13502         fprintf(stderr, "checking root refs\n");
13503         /* For low memory mode, check_fs_roots_v2 handles root refs */
13504         if (check_mode != CHECK_MODE_LOWMEM) {
13505                 ret = check_root_refs(root, &root_cache);
13506                 err |= !!ret;
13507                 if (ret) {
13508                         error("errors found in root refs");
13509                         goto out;
13510                 }
13511         }
13512
13513         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13514                 struct extent_buffer *eb;
13515
13516                 eb = list_first_entry(&root->fs_info->recow_ebs,
13517                                       struct extent_buffer, recow);
13518                 list_del_init(&eb->recow);
13519                 ret = recow_extent_buffer(root, eb);
13520                 err |= !!ret;
13521                 if (ret) {
13522                         error("fails to fix transid errors");
13523                         break;
13524                 }
13525         }
13526
13527         while (!list_empty(&delete_items)) {
13528                 struct bad_item *bad;
13529
13530                 bad = list_first_entry(&delete_items, struct bad_item, list);
13531                 list_del_init(&bad->list);
13532                 if (repair) {
13533                         ret = delete_bad_item(root, bad);
13534                         err |= !!ret;
13535                 }
13536                 free(bad);
13537         }
13538
13539         if (info->quota_enabled) {
13540                 fprintf(stderr, "checking quota groups\n");
13541                 ret = qgroup_verify_all(info);
13542                 err |= !!ret;
13543                 if (ret) {
13544                         error("failed to check quota groups");
13545                         goto out;
13546                 }
13547                 report_qgroups(0);
13548                 ret = repair_qgroups(info, &qgroups_repaired);
13549                 err |= !!ret;
13550                 if (err) {
13551                         error("failed to repair quota groups");
13552                         goto out;
13553                 }
13554                 ret = 0;
13555         }
13556
13557         if (!list_empty(&root->fs_info->recow_ebs)) {
13558                 error("transid errors in file system");
13559                 ret = 1;
13560                 err |= !!ret;
13561         }
13562 out:
13563         printf("found %llu bytes used, ",
13564                (unsigned long long)bytes_used);
13565         if (err)
13566                 printf("error(s) found\n");
13567         else
13568                 printf("no error found\n");
13569         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13570         printf("total tree bytes: %llu\n",
13571                (unsigned long long)total_btree_bytes);
13572         printf("total fs tree bytes: %llu\n",
13573                (unsigned long long)total_fs_tree_bytes);
13574         printf("total extent tree bytes: %llu\n",
13575                (unsigned long long)total_extent_tree_bytes);
13576         printf("btree space waste bytes: %llu\n",
13577                (unsigned long long)btree_space_waste);
13578         printf("file data blocks allocated: %llu\n referenced %llu\n",
13579                 (unsigned long long)data_bytes_allocated,
13580                 (unsigned long long)data_bytes_referenced);
13581
13582         free_qgroup_counts();
13583         free_root_recs_tree(&root_cache);
13584 close_out:
13585         close_ctree(root);
13586 err_out:
13587         if (ctx.progress_enabled)
13588                 task_deinit(ctx.info);
13589
13590         return err;
13591 }