btrfs-progs: check: introduce repair_fs_first_inode()
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 {
138         return container_of(back, struct data_backref, node);
139 }
140
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
142 {
143         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145         struct data_backref *back1 = to_data_backref(ext1);
146         struct data_backref *back2 = to_data_backref(ext2);
147
148         WARN_ON(!ext1->is_data);
149         WARN_ON(!ext2->is_data);
150
151         /* parent and root are a union, so this covers both */
152         if (back1->parent > back2->parent)
153                 return 1;
154         if (back1->parent < back2->parent)
155                 return -1;
156
157         /* This is a full backref and the parents match. */
158         if (back1->node.full_backref)
159                 return 0;
160
161         if (back1->owner > back2->owner)
162                 return 1;
163         if (back1->owner < back2->owner)
164                 return -1;
165
166         if (back1->offset > back2->offset)
167                 return 1;
168         if (back1->offset < back2->offset)
169                 return -1;
170
171         if (back1->found_ref && back2->found_ref) {
172                 if (back1->disk_bytenr > back2->disk_bytenr)
173                         return 1;
174                 if (back1->disk_bytenr < back2->disk_bytenr)
175                         return -1;
176
177                 if (back1->bytes > back2->bytes)
178                         return 1;
179                 if (back1->bytes < back2->bytes)
180                         return -1;
181         }
182
183         return 0;
184 }
185
186 /*
187  * Much like data_backref, just removed the undetermined members
188  * and change it to use list_head.
189  * During extent scan, it is stored in root->orphan_data_extent.
190  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
191  */
192 struct orphan_data_extent {
193         struct list_head list;
194         u64 root;
195         u64 objectid;
196         u64 offset;
197         u64 disk_bytenr;
198         u64 disk_len;
199 };
200
201 struct tree_backref {
202         struct extent_backref node;
203         union {
204                 u64 parent;
205                 u64 root;
206         };
207 };
208
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
210 {
211         return container_of(back, struct tree_backref, node);
212 }
213
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
215 {
216         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218         struct tree_backref *back1 = to_tree_backref(ext1);
219         struct tree_backref *back2 = to_tree_backref(ext2);
220
221         WARN_ON(ext1->is_data);
222         WARN_ON(ext2->is_data);
223
224         /* parent and root are a union, so this covers both */
225         if (back1->parent > back2->parent)
226                 return 1;
227         if (back1->parent < back2->parent)
228                 return -1;
229
230         return 0;
231 }
232
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
234 {
235         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
237
238         if (ext1->is_data > ext2->is_data)
239                 return 1;
240
241         if (ext1->is_data < ext2->is_data)
242                 return -1;
243
244         if (ext1->full_backref > ext2->full_backref)
245                 return 1;
246         if (ext1->full_backref < ext2->full_backref)
247                 return -1;
248
249         if (ext1->is_data)
250                 return compare_data_backref(node1, node2);
251         else
252                 return compare_tree_backref(node1, node2);
253 }
254
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
257
258 struct extent_record {
259         struct list_head backrefs;
260         struct list_head dups;
261         struct rb_root backref_tree;
262         struct list_head list;
263         struct cache_extent cache;
264         struct btrfs_disk_key parent_key;
265         u64 start;
266         u64 max_size;
267         u64 nr;
268         u64 refs;
269         u64 extent_item_refs;
270         u64 generation;
271         u64 parent_generation;
272         u64 info_objectid;
273         u32 num_duplicates;
274         u8 info_level;
275         unsigned int flag_block_full_backref:2;
276         unsigned int found_rec:1;
277         unsigned int content_checked:1;
278         unsigned int owner_ref_checked:1;
279         unsigned int is_root:1;
280         unsigned int metadata:1;
281         unsigned int bad_full_backref:1;
282         unsigned int crossing_stripes:1;
283         unsigned int wrong_chunk_type:1;
284 };
285
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
287 {
288         return container_of(entry, struct extent_record, list);
289 }
290
291 struct inode_backref {
292         struct list_head list;
293         unsigned int found_dir_item:1;
294         unsigned int found_dir_index:1;
295         unsigned int found_inode_ref:1;
296         u8 filetype;
297         u8 ref_type;
298         int errors;
299         u64 dir;
300         u64 index;
301         u16 namelen;
302         char name[0];
303 };
304
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
306 {
307         return list_entry(entry, struct inode_backref, list);
308 }
309
310 struct root_item_record {
311         struct list_head list;
312         u64 objectid;
313         u64 bytenr;
314         u64 last_snapshot;
315         u8 level;
316         u8 drop_level;
317         struct btrfs_key drop_key;
318 };
319
320 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
322 #define REF_ERR_NO_INODE_REF            (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
325 #define REF_ERR_DUP_INODE_REF           (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF             (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
333
334 struct file_extent_hole {
335         struct rb_node node;
336         u64 start;
337         u64 len;
338 };
339
340 struct inode_record {
341         struct list_head backrefs;
342         unsigned int checked:1;
343         unsigned int merging:1;
344         unsigned int found_inode_item:1;
345         unsigned int found_dir_item:1;
346         unsigned int found_file_extent:1;
347         unsigned int found_csum_item:1;
348         unsigned int some_csum_missing:1;
349         unsigned int nodatasum:1;
350         int errors;
351
352         u64 ino;
353         u32 nlink;
354         u32 imode;
355         u64 isize;
356         u64 nbytes;
357
358         u32 found_link;
359         u64 found_size;
360         u64 extent_start;
361         u64 extent_end;
362         struct rb_root holes;
363         struct list_head orphan_extents;
364
365         u32 refs;
366 };
367
368 #define I_ERR_NO_INODE_ITEM             (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
383
384 struct root_backref {
385         struct list_head list;
386         unsigned int found_dir_item:1;
387         unsigned int found_dir_index:1;
388         unsigned int found_back_ref:1;
389         unsigned int found_forward_ref:1;
390         unsigned int reachable:1;
391         int errors;
392         u64 ref_root;
393         u64 dir;
394         u64 index;
395         u16 namelen;
396         char name[0];
397 };
398
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
400 {
401         return list_entry(entry, struct root_backref, list);
402 }
403
404 struct root_record {
405         struct list_head backrefs;
406         struct cache_extent cache;
407         unsigned int found_root_item:1;
408         u64 objectid;
409         u32 found_ref;
410 };
411
412 struct ptr_node {
413         struct cache_extent cache;
414         void *data;
415 };
416
417 struct shared_node {
418         struct cache_extent cache;
419         struct cache_tree root_cache;
420         struct cache_tree inode_cache;
421         struct inode_record *current;
422         u32 refs;
423 };
424
425 struct block_info {
426         u64 start;
427         u32 size;
428 };
429
430 struct walk_control {
431         struct cache_tree shared;
432         struct shared_node *nodes[BTRFS_MAX_LEVEL];
433         int active_node;
434         int root_level;
435 };
436
437 struct bad_item {
438         struct btrfs_key key;
439         u64 root_id;
440         struct list_head list;
441 };
442
443 struct extent_entry {
444         u64 bytenr;
445         u64 bytes;
446         int count;
447         int broken;
448         struct list_head list;
449 };
450
451 struct root_item_info {
452         /* level of the root */
453         u8 level;
454         /* number of nodes at this level, must be 1 for a root */
455         int node_count;
456         u64 bytenr;
457         u64 gen;
458         struct cache_extent cache_extent;
459 };
460
461 /*
462  * Error bit for low memory mode check.
463  *
464  * Currently no caller cares about it yet.  Just internal use for error
465  * classification.
466  */
467 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH     (1 << 8)
477
478 static void *print_status_check(void *p)
479 {
480         struct task_ctx *priv = p;
481         const char work_indicator[] = { '.', 'o', 'O', 'o' };
482         uint32_t count = 0;
483         static char *task_position_string[] = {
484                 "checking extents",
485                 "checking free space cache",
486                 "checking fs roots",
487         };
488
489         task_period_start(priv->info, 1000 /* 1s */);
490
491         if (priv->tp == TASK_NOTHING)
492                 return NULL;
493
494         while (1) {
495                 printf("%s [%c]\r", task_position_string[priv->tp],
496                                 work_indicator[count % 4]);
497                 count++;
498                 fflush(stdout);
499                 task_period_wait(priv->info);
500         }
501         return NULL;
502 }
503
504 static int print_status_return(void *p)
505 {
506         printf("\n");
507         fflush(stdout);
508
509         return 0;
510 }
511
512 static enum btrfs_check_mode parse_check_mode(const char *str)
513 {
514         if (strcmp(str, "lowmem") == 0)
515                 return CHECK_MODE_LOWMEM;
516         if (strcmp(str, "orig") == 0)
517                 return CHECK_MODE_ORIGINAL;
518         if (strcmp(str, "original") == 0)
519                 return CHECK_MODE_ORIGINAL;
520
521         return CHECK_MODE_UNKNOWN;
522 }
523
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
526 {
527         struct file_extent_hole *hole;
528
529         if (RB_EMPTY_ROOT(holes))
530                 return (u64)-1;
531
532         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
533         return hole->start;
534 }
535
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
537 {
538         struct file_extent_hole *hole1;
539         struct file_extent_hole *hole2;
540
541         hole1 = rb_entry(node1, struct file_extent_hole, node);
542         hole2 = rb_entry(node2, struct file_extent_hole, node);
543
544         if (hole1->start > hole2->start)
545                 return -1;
546         if (hole1->start < hole2->start)
547                 return 1;
548         /* Now hole1->start == hole2->start */
549         if (hole1->len >= hole2->len)
550                 /*
551                  * Hole 1 will be merge center
552                  * Same hole will be merged later
553                  */
554                 return -1;
555         /* Hole 2 will be merge center */
556         return 1;
557 }
558
559 /*
560  * Add a hole to the record
561  *
562  * This will do hole merge for copy_file_extent_holes(),
563  * which will ensure there won't be continuous holes.
564  */
565 static int add_file_extent_hole(struct rb_root *holes,
566                                 u64 start, u64 len)
567 {
568         struct file_extent_hole *hole;
569         struct file_extent_hole *prev = NULL;
570         struct file_extent_hole *next = NULL;
571
572         hole = malloc(sizeof(*hole));
573         if (!hole)
574                 return -ENOMEM;
575         hole->start = start;
576         hole->len = len;
577         /* Since compare will not return 0, no -EEXIST will happen */
578         rb_insert(holes, &hole->node, compare_hole);
579
580         /* simple merge with previous hole */
581         if (rb_prev(&hole->node))
582                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
583                                 node);
584         if (prev && prev->start + prev->len >= hole->start) {
585                 hole->len = hole->start + hole->len - prev->start;
586                 hole->start = prev->start;
587                 rb_erase(&prev->node, holes);
588                 free(prev);
589                 prev = NULL;
590         }
591
592         /* iterate merge with next holes */
593         while (1) {
594                 if (!rb_next(&hole->node))
595                         break;
596                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
597                                         node);
598                 if (hole->start + hole->len >= next->start) {
599                         if (hole->start + hole->len <= next->start + next->len)
600                                 hole->len = next->start + next->len -
601                                             hole->start;
602                         rb_erase(&next->node, holes);
603                         free(next);
604                         next = NULL;
605                 } else
606                         break;
607         }
608         return 0;
609 }
610
611 static int compare_hole_range(struct rb_node *node, void *data)
612 {
613         struct file_extent_hole *hole;
614         u64 start;
615
616         hole = (struct file_extent_hole *)data;
617         start = hole->start;
618
619         hole = rb_entry(node, struct file_extent_hole, node);
620         if (start < hole->start)
621                 return -1;
622         if (start >= hole->start && start < hole->start + hole->len)
623                 return 0;
624         return 1;
625 }
626
627 /*
628  * Delete a hole in the record
629  *
630  * This will do the hole split and is much restrict than add.
631  */
632 static int del_file_extent_hole(struct rb_root *holes,
633                                 u64 start, u64 len)
634 {
635         struct file_extent_hole *hole;
636         struct file_extent_hole tmp;
637         u64 prev_start = 0;
638         u64 prev_len = 0;
639         u64 next_start = 0;
640         u64 next_len = 0;
641         struct rb_node *node;
642         int have_prev = 0;
643         int have_next = 0;
644         int ret = 0;
645
646         tmp.start = start;
647         tmp.len = len;
648         node = rb_search(holes, &tmp, compare_hole_range, NULL);
649         if (!node)
650                 return -EEXIST;
651         hole = rb_entry(node, struct file_extent_hole, node);
652         if (start + len > hole->start + hole->len)
653                 return -EEXIST;
654
655         /*
656          * Now there will be no overlap, delete the hole and re-add the
657          * split(s) if they exists.
658          */
659         if (start > hole->start) {
660                 prev_start = hole->start;
661                 prev_len = start - hole->start;
662                 have_prev = 1;
663         }
664         if (hole->start + hole->len > start + len) {
665                 next_start = start + len;
666                 next_len = hole->start + hole->len - start - len;
667                 have_next = 1;
668         }
669         rb_erase(node, holes);
670         free(hole);
671         if (have_prev) {
672                 ret = add_file_extent_hole(holes, prev_start, prev_len);
673                 if (ret < 0)
674                         return ret;
675         }
676         if (have_next) {
677                 ret = add_file_extent_hole(holes, next_start, next_len);
678                 if (ret < 0)
679                         return ret;
680         }
681         return 0;
682 }
683
684 static int copy_file_extent_holes(struct rb_root *dst,
685                                   struct rb_root *src)
686 {
687         struct file_extent_hole *hole;
688         struct rb_node *node;
689         int ret = 0;
690
691         node = rb_first(src);
692         while (node) {
693                 hole = rb_entry(node, struct file_extent_hole, node);
694                 ret = add_file_extent_hole(dst, hole->start, hole->len);
695                 if (ret)
696                         break;
697                 node = rb_next(node);
698         }
699         return ret;
700 }
701
702 static void free_file_extent_holes(struct rb_root *holes)
703 {
704         struct rb_node *node;
705         struct file_extent_hole *hole;
706
707         node = rb_first(holes);
708         while (node) {
709                 hole = rb_entry(node, struct file_extent_hole, node);
710                 rb_erase(node, holes);
711                 free(hole);
712                 node = rb_first(holes);
713         }
714 }
715
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
717
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719                                  struct btrfs_root *root)
720 {
721         if (root->last_trans != trans->transid) {
722                 root->track_dirty = 1;
723                 root->last_trans = trans->transid;
724                 root->commit_root = root->node;
725                 extent_buffer_get(root->node);
726         }
727 }
728
729 static u8 imode_to_type(u32 imode)
730 {
731 #define S_SHIFT 12
732         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
734                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
735                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
736                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
737                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
738                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
739                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
740         };
741
742         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
743 #undef S_SHIFT
744 }
745
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
747 {
748         struct device_record *rec1;
749         struct device_record *rec2;
750
751         rec1 = rb_entry(node1, struct device_record, node);
752         rec2 = rb_entry(node2, struct device_record, node);
753         if (rec1->devid > rec2->devid)
754                 return -1;
755         else if (rec1->devid < rec2->devid)
756                 return 1;
757         else
758                 return 0;
759 }
760
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
762 {
763         struct inode_record *rec;
764         struct inode_backref *backref;
765         struct inode_backref *orig;
766         struct inode_backref *tmp;
767         struct orphan_data_extent *src_orphan;
768         struct orphan_data_extent *dst_orphan;
769         struct rb_node *rb;
770         size_t size;
771         int ret;
772
773         rec = malloc(sizeof(*rec));
774         if (!rec)
775                 return ERR_PTR(-ENOMEM);
776         memcpy(rec, orig_rec, sizeof(*rec));
777         rec->refs = 1;
778         INIT_LIST_HEAD(&rec->backrefs);
779         INIT_LIST_HEAD(&rec->orphan_extents);
780         rec->holes = RB_ROOT;
781
782         list_for_each_entry(orig, &orig_rec->backrefs, list) {
783                 size = sizeof(*orig) + orig->namelen + 1;
784                 backref = malloc(size);
785                 if (!backref) {
786                         ret = -ENOMEM;
787                         goto cleanup;
788                 }
789                 memcpy(backref, orig, size);
790                 list_add_tail(&backref->list, &rec->backrefs);
791         }
792         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793                 dst_orphan = malloc(sizeof(*dst_orphan));
794                 if (!dst_orphan) {
795                         ret = -ENOMEM;
796                         goto cleanup;
797                 }
798                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
800         }
801         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
802         if (ret < 0)
803                 goto cleanup_rb;
804
805         return rec;
806
807 cleanup_rb:
808         rb = rb_first(&rec->holes);
809         while (rb) {
810                 struct file_extent_hole *hole;
811
812                 hole = rb_entry(rb, struct file_extent_hole, node);
813                 rb = rb_next(rb);
814                 free(hole);
815         }
816
817 cleanup:
818         if (!list_empty(&rec->backrefs))
819                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820                         list_del(&orig->list);
821                         free(orig);
822                 }
823
824         if (!list_empty(&rec->orphan_extents))
825                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826                         list_del(&orig->list);
827                         free(orig);
828                 }
829
830         free(rec);
831
832         return ERR_PTR(ret);
833 }
834
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
836                                       u64 objectid)
837 {
838         struct orphan_data_extent *orphan;
839
840         if (list_empty(orphan_extents))
841                 return;
842         printf("The following data extent is lost in tree %llu:\n",
843                objectid);
844         list_for_each_entry(orphan, orphan_extents, list) {
845                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
847                        orphan->disk_len);
848         }
849 }
850
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
852 {
853         u64 root_objectid = root->root_key.objectid;
854         int errors = rec->errors;
855
856         if (!errors)
857                 return;
858         /* reloc root errors, we print its corresponding fs root objectid*/
859         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860                 root_objectid = root->root_key.offset;
861                 fprintf(stderr, "reloc");
862         }
863         fprintf(stderr, "root %llu inode %llu errors %x",
864                 (unsigned long long) root_objectid,
865                 (unsigned long long) rec->ino, rec->errors);
866
867         if (errors & I_ERR_NO_INODE_ITEM)
868                 fprintf(stderr, ", no inode item");
869         if (errors & I_ERR_NO_ORPHAN_ITEM)
870                 fprintf(stderr, ", no orphan item");
871         if (errors & I_ERR_DUP_INODE_ITEM)
872                 fprintf(stderr, ", dup inode item");
873         if (errors & I_ERR_DUP_DIR_INDEX)
874                 fprintf(stderr, ", dup dir index");
875         if (errors & I_ERR_ODD_DIR_ITEM)
876                 fprintf(stderr, ", odd dir item");
877         if (errors & I_ERR_ODD_FILE_EXTENT)
878                 fprintf(stderr, ", odd file extent");
879         if (errors & I_ERR_BAD_FILE_EXTENT)
880                 fprintf(stderr, ", bad file extent");
881         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882                 fprintf(stderr, ", file extent overlap");
883         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884                 fprintf(stderr, ", file extent discount");
885         if (errors & I_ERR_DIR_ISIZE_WRONG)
886                 fprintf(stderr, ", dir isize wrong");
887         if (errors & I_ERR_FILE_NBYTES_WRONG)
888                 fprintf(stderr, ", nbytes wrong");
889         if (errors & I_ERR_ODD_CSUM_ITEM)
890                 fprintf(stderr, ", odd csum item");
891         if (errors & I_ERR_SOME_CSUM_MISSING)
892                 fprintf(stderr, ", some csum missing");
893         if (errors & I_ERR_LINK_COUNT_WRONG)
894                 fprintf(stderr, ", link count wrong");
895         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896                 fprintf(stderr, ", orphan file extent");
897         fprintf(stderr, "\n");
898         /* Print the orphan extents if needed */
899         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
901
902         /* Print the holes if needed */
903         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904                 struct file_extent_hole *hole;
905                 struct rb_node *node;
906                 int found = 0;
907
908                 node = rb_first(&rec->holes);
909                 fprintf(stderr, "Found file extent holes:\n");
910                 while (node) {
911                         found = 1;
912                         hole = rb_entry(node, struct file_extent_hole, node);
913                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
914                                 hole->start, hole->len);
915                         node = rb_next(node);
916                 }
917                 if (!found)
918                         fprintf(stderr, "\tstart: 0, len: %llu\n",
919                                 round_up(rec->isize,
920                                          root->fs_info->sectorsize));
921         }
922 }
923
924 static void print_ref_error(int errors)
925 {
926         if (errors & REF_ERR_NO_DIR_ITEM)
927                 fprintf(stderr, ", no dir item");
928         if (errors & REF_ERR_NO_DIR_INDEX)
929                 fprintf(stderr, ", no dir index");
930         if (errors & REF_ERR_NO_INODE_REF)
931                 fprintf(stderr, ", no inode ref");
932         if (errors & REF_ERR_DUP_DIR_ITEM)
933                 fprintf(stderr, ", dup dir item");
934         if (errors & REF_ERR_DUP_DIR_INDEX)
935                 fprintf(stderr, ", dup dir index");
936         if (errors & REF_ERR_DUP_INODE_REF)
937                 fprintf(stderr, ", dup inode ref");
938         if (errors & REF_ERR_INDEX_UNMATCH)
939                 fprintf(stderr, ", index mismatch");
940         if (errors & REF_ERR_FILETYPE_UNMATCH)
941                 fprintf(stderr, ", filetype mismatch");
942         if (errors & REF_ERR_NAME_TOO_LONG)
943                 fprintf(stderr, ", name too long");
944         if (errors & REF_ERR_NO_ROOT_REF)
945                 fprintf(stderr, ", no root ref");
946         if (errors & REF_ERR_NO_ROOT_BACKREF)
947                 fprintf(stderr, ", no root backref");
948         if (errors & REF_ERR_DUP_ROOT_REF)
949                 fprintf(stderr, ", dup root ref");
950         if (errors & REF_ERR_DUP_ROOT_BACKREF)
951                 fprintf(stderr, ", dup root backref");
952         fprintf(stderr, "\n");
953 }
954
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956                                           u64 ino, int mod)
957 {
958         struct ptr_node *node;
959         struct cache_extent *cache;
960         struct inode_record *rec = NULL;
961         int ret;
962
963         cache = lookup_cache_extent(inode_cache, ino, 1);
964         if (cache) {
965                 node = container_of(cache, struct ptr_node, cache);
966                 rec = node->data;
967                 if (mod && rec->refs > 1) {
968                         node->data = clone_inode_rec(rec);
969                         if (IS_ERR(node->data))
970                                 return node->data;
971                         rec->refs--;
972                         rec = node->data;
973                 }
974         } else if (mod) {
975                 rec = calloc(1, sizeof(*rec));
976                 if (!rec)
977                         return ERR_PTR(-ENOMEM);
978                 rec->ino = ino;
979                 rec->extent_start = (u64)-1;
980                 rec->refs = 1;
981                 INIT_LIST_HEAD(&rec->backrefs);
982                 INIT_LIST_HEAD(&rec->orphan_extents);
983                 rec->holes = RB_ROOT;
984
985                 node = malloc(sizeof(*node));
986                 if (!node) {
987                         free(rec);
988                         return ERR_PTR(-ENOMEM);
989                 }
990                 node->cache.start = ino;
991                 node->cache.size = 1;
992                 node->data = rec;
993
994                 if (ino == BTRFS_FREE_INO_OBJECTID)
995                         rec->found_link = 1;
996
997                 ret = insert_cache_extent(inode_cache, &node->cache);
998                 if (ret)
999                         return ERR_PTR(-EEXIST);
1000         }
1001         return rec;
1002 }
1003
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1005 {
1006         struct orphan_data_extent *orphan;
1007
1008         while (!list_empty(orphan_extents)) {
1009                 orphan = list_entry(orphan_extents->next,
1010                                     struct orphan_data_extent, list);
1011                 list_del(&orphan->list);
1012                 free(orphan);
1013         }
1014 }
1015
1016 static void free_inode_rec(struct inode_record *rec)
1017 {
1018         struct inode_backref *backref;
1019
1020         if (--rec->refs > 0)
1021                 return;
1022
1023         while (!list_empty(&rec->backrefs)) {
1024                 backref = to_inode_backref(rec->backrefs.next);
1025                 list_del(&backref->list);
1026                 free(backref);
1027         }
1028         free_orphan_data_extents(&rec->orphan_extents);
1029         free_file_extent_holes(&rec->holes);
1030         free(rec);
1031 }
1032
1033 static int can_free_inode_rec(struct inode_record *rec)
1034 {
1035         if (!rec->errors && rec->checked && rec->found_inode_item &&
1036             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1037                 return 1;
1038         return 0;
1039 }
1040
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042                                  struct inode_record *rec)
1043 {
1044         struct cache_extent *cache;
1045         struct inode_backref *tmp, *backref;
1046         struct ptr_node *node;
1047         u8 filetype;
1048
1049         if (!rec->found_inode_item)
1050                 return;
1051
1052         filetype = imode_to_type(rec->imode);
1053         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054                 if (backref->found_dir_item && backref->found_dir_index) {
1055                         if (backref->filetype != filetype)
1056                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057                         if (!backref->errors && backref->found_inode_ref &&
1058                             rec->nlink == rec->found_link) {
1059                                 list_del(&backref->list);
1060                                 free(backref);
1061                         }
1062                 }
1063         }
1064
1065         if (!rec->checked || rec->merging)
1066                 return;
1067
1068         if (S_ISDIR(rec->imode)) {
1069                 if (rec->found_size != rec->isize)
1070                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071                 if (rec->found_file_extent)
1072                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074                 if (rec->found_dir_item)
1075                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1076                 if (rec->found_size != rec->nbytes)
1077                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078                 if (rec->nlink > 0 && !no_holes &&
1079                     (rec->extent_end < rec->isize ||
1080                      first_extent_gap(&rec->holes) < rec->isize))
1081                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082         }
1083
1084         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085                 if (rec->found_csum_item && rec->nodatasum)
1086                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087                 if (rec->some_csum_missing && !rec->nodatasum)
1088                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089         }
1090
1091         BUG_ON(rec->refs != 1);
1092         if (can_free_inode_rec(rec)) {
1093                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094                 node = container_of(cache, struct ptr_node, cache);
1095                 BUG_ON(node->data != rec);
1096                 remove_cache_extent(inode_cache, &node->cache);
1097                 free(node);
1098                 free_inode_rec(rec);
1099         }
1100 }
1101
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1103 {
1104         struct btrfs_path path;
1105         struct btrfs_key key;
1106         int ret;
1107
1108         key.objectid = BTRFS_ORPHAN_OBJECTID;
1109         key.type = BTRFS_ORPHAN_ITEM_KEY;
1110         key.offset = ino;
1111
1112         btrfs_init_path(&path);
1113         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114         btrfs_release_path(&path);
1115         if (ret > 0)
1116                 ret = -ENOENT;
1117         return ret;
1118 }
1119
1120 static int process_inode_item(struct extent_buffer *eb,
1121                               int slot, struct btrfs_key *key,
1122                               struct shared_node *active_node)
1123 {
1124         struct inode_record *rec;
1125         struct btrfs_inode_item *item;
1126
1127         rec = active_node->current;
1128         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129         if (rec->found_inode_item) {
1130                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131                 return 1;
1132         }
1133         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134         rec->nlink = btrfs_inode_nlink(eb, item);
1135         rec->isize = btrfs_inode_size(eb, item);
1136         rec->nbytes = btrfs_inode_nbytes(eb, item);
1137         rec->imode = btrfs_inode_mode(eb, item);
1138         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1139                 rec->nodatasum = 1;
1140         rec->found_inode_item = 1;
1141         if (rec->nlink == 0)
1142                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143         maybe_free_inode_rec(&active_node->inode_cache, rec);
1144         return 0;
1145 }
1146
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1148                                                 const char *name,
1149                                                 int namelen, u64 dir)
1150 {
1151         struct inode_backref *backref;
1152
1153         list_for_each_entry(backref, &rec->backrefs, list) {
1154                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1155                         break;
1156                 if (backref->dir != dir || backref->namelen != namelen)
1157                         continue;
1158                 if (memcmp(name, backref->name, namelen))
1159                         continue;
1160                 return backref;
1161         }
1162
1163         backref = malloc(sizeof(*backref) + namelen + 1);
1164         if (!backref)
1165                 return NULL;
1166         memset(backref, 0, sizeof(*backref));
1167         backref->dir = dir;
1168         backref->namelen = namelen;
1169         memcpy(backref->name, name, namelen);
1170         backref->name[namelen] = '\0';
1171         list_add_tail(&backref->list, &rec->backrefs);
1172         return backref;
1173 }
1174
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176                              u64 ino, u64 dir, u64 index,
1177                              const char *name, int namelen,
1178                              u8 filetype, u8 itemtype, int errors)
1179 {
1180         struct inode_record *rec;
1181         struct inode_backref *backref;
1182
1183         rec = get_inode_rec(inode_cache, ino, 1);
1184         BUG_ON(IS_ERR(rec));
1185         backref = get_inode_backref(rec, name, namelen, dir);
1186         BUG_ON(!backref);
1187         if (errors)
1188                 backref->errors |= errors;
1189         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190                 if (backref->found_dir_index)
1191                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192                 if (backref->found_inode_ref && backref->index != index)
1193                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1194                 if (backref->found_dir_item && backref->filetype != filetype)
1195                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1196
1197                 backref->index = index;
1198                 backref->filetype = filetype;
1199                 backref->found_dir_index = 1;
1200         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1201                 rec->found_link++;
1202                 if (backref->found_dir_item)
1203                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204                 if (backref->found_dir_index && backref->filetype != filetype)
1205                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1206
1207                 backref->filetype = filetype;
1208                 backref->found_dir_item = 1;
1209         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211                 if (backref->found_inode_ref)
1212                         backref->errors |= REF_ERR_DUP_INODE_REF;
1213                 if (backref->found_dir_index && backref->index != index)
1214                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1215                 else
1216                         backref->index = index;
1217
1218                 backref->ref_type = itemtype;
1219                 backref->found_inode_ref = 1;
1220         } else {
1221                 BUG_ON(1);
1222         }
1223
1224         maybe_free_inode_rec(inode_cache, rec);
1225         return 0;
1226 }
1227
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229                             struct cache_tree *dst_cache)
1230 {
1231         struct inode_backref *backref;
1232         u32 dir_count = 0;
1233         int ret = 0;
1234
1235         dst->merging = 1;
1236         list_for_each_entry(backref, &src->backrefs, list) {
1237                 if (backref->found_dir_index) {
1238                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1239                                         backref->index, backref->name,
1240                                         backref->namelen, backref->filetype,
1241                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1242                 }
1243                 if (backref->found_dir_item) {
1244                         dir_count++;
1245                         add_inode_backref(dst_cache, dst->ino,
1246                                         backref->dir, 0, backref->name,
1247                                         backref->namelen, backref->filetype,
1248                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1249                 }
1250                 if (backref->found_inode_ref) {
1251                         add_inode_backref(dst_cache, dst->ino,
1252                                         backref->dir, backref->index,
1253                                         backref->name, backref->namelen, 0,
1254                                         backref->ref_type, backref->errors);
1255                 }
1256         }
1257
1258         if (src->found_dir_item)
1259                 dst->found_dir_item = 1;
1260         if (src->found_file_extent)
1261                 dst->found_file_extent = 1;
1262         if (src->found_csum_item)
1263                 dst->found_csum_item = 1;
1264         if (src->some_csum_missing)
1265                 dst->some_csum_missing = 1;
1266         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1268                 if (ret < 0)
1269                         return ret;
1270         }
1271
1272         BUG_ON(src->found_link < dir_count);
1273         dst->found_link += src->found_link - dir_count;
1274         dst->found_size += src->found_size;
1275         if (src->extent_start != (u64)-1) {
1276                 if (dst->extent_start == (u64)-1) {
1277                         dst->extent_start = src->extent_start;
1278                         dst->extent_end = src->extent_end;
1279                 } else {
1280                         if (dst->extent_end > src->extent_start)
1281                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282                         else if (dst->extent_end < src->extent_start) {
1283                                 ret = add_file_extent_hole(&dst->holes,
1284                                         dst->extent_end,
1285                                         src->extent_start - dst->extent_end);
1286                         }
1287                         if (dst->extent_end < src->extent_end)
1288                                 dst->extent_end = src->extent_end;
1289                 }
1290         }
1291
1292         dst->errors |= src->errors;
1293         if (src->found_inode_item) {
1294                 if (!dst->found_inode_item) {
1295                         dst->nlink = src->nlink;
1296                         dst->isize = src->isize;
1297                         dst->nbytes = src->nbytes;
1298                         dst->imode = src->imode;
1299                         dst->nodatasum = src->nodatasum;
1300                         dst->found_inode_item = 1;
1301                 } else {
1302                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1303                 }
1304         }
1305         dst->merging = 0;
1306
1307         return 0;
1308 }
1309
1310 static int splice_shared_node(struct shared_node *src_node,
1311                               struct shared_node *dst_node)
1312 {
1313         struct cache_extent *cache;
1314         struct ptr_node *node, *ins;
1315         struct cache_tree *src, *dst;
1316         struct inode_record *rec, *conflict;
1317         u64 current_ino = 0;
1318         int splice = 0;
1319         int ret;
1320
1321         if (--src_node->refs == 0)
1322                 splice = 1;
1323         if (src_node->current)
1324                 current_ino = src_node->current->ino;
1325
1326         src = &src_node->root_cache;
1327         dst = &dst_node->root_cache;
1328 again:
1329         cache = search_cache_extent(src, 0);
1330         while (cache) {
1331                 node = container_of(cache, struct ptr_node, cache);
1332                 rec = node->data;
1333                 cache = next_cache_extent(cache);
1334
1335                 if (splice) {
1336                         remove_cache_extent(src, &node->cache);
1337                         ins = node;
1338                 } else {
1339                         ins = malloc(sizeof(*ins));
1340                         BUG_ON(!ins);
1341                         ins->cache.start = node->cache.start;
1342                         ins->cache.size = node->cache.size;
1343                         ins->data = rec;
1344                         rec->refs++;
1345                 }
1346                 ret = insert_cache_extent(dst, &ins->cache);
1347                 if (ret == -EEXIST) {
1348                         conflict = get_inode_rec(dst, rec->ino, 1);
1349                         BUG_ON(IS_ERR(conflict));
1350                         merge_inode_recs(rec, conflict, dst);
1351                         if (rec->checked) {
1352                                 conflict->checked = 1;
1353                                 if (dst_node->current == conflict)
1354                                         dst_node->current = NULL;
1355                         }
1356                         maybe_free_inode_rec(dst, conflict);
1357                         free_inode_rec(rec);
1358                         free(ins);
1359                 } else {
1360                         BUG_ON(ret);
1361                 }
1362         }
1363
1364         if (src == &src_node->root_cache) {
1365                 src = &src_node->inode_cache;
1366                 dst = &dst_node->inode_cache;
1367                 goto again;
1368         }
1369
1370         if (current_ino > 0 && (!dst_node->current ||
1371             current_ino > dst_node->current->ino)) {
1372                 if (dst_node->current) {
1373                         dst_node->current->checked = 1;
1374                         maybe_free_inode_rec(dst, dst_node->current);
1375                 }
1376                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377                 BUG_ON(IS_ERR(dst_node->current));
1378         }
1379         return 0;
1380 }
1381
1382 static void free_inode_ptr(struct cache_extent *cache)
1383 {
1384         struct ptr_node *node;
1385         struct inode_record *rec;
1386
1387         node = container_of(cache, struct ptr_node, cache);
1388         rec = node->data;
1389         free_inode_rec(rec);
1390         free(node);
1391 }
1392
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1394
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396                                             u64 bytenr)
1397 {
1398         struct cache_extent *cache;
1399         struct shared_node *node;
1400
1401         cache = lookup_cache_extent(shared, bytenr, 1);
1402         if (cache) {
1403                 node = container_of(cache, struct shared_node, cache);
1404                 return node;
1405         }
1406         return NULL;
1407 }
1408
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 {
1411         int ret;
1412         struct shared_node *node;
1413
1414         node = calloc(1, sizeof(*node));
1415         if (!node)
1416                 return -ENOMEM;
1417         node->cache.start = bytenr;
1418         node->cache.size = 1;
1419         cache_tree_init(&node->root_cache);
1420         cache_tree_init(&node->inode_cache);
1421         node->refs = refs;
1422
1423         ret = insert_cache_extent(shared, &node->cache);
1424
1425         return ret;
1426 }
1427
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429                              struct walk_control *wc, int level)
1430 {
1431         struct shared_node *node;
1432         struct shared_node *dest;
1433         int ret;
1434
1435         if (level == wc->active_node)
1436                 return 0;
1437
1438         BUG_ON(wc->active_node <= level);
1439         node = find_shared_node(&wc->shared, bytenr);
1440         if (!node) {
1441                 ret = add_shared_node(&wc->shared, bytenr, refs);
1442                 BUG_ON(ret);
1443                 node = find_shared_node(&wc->shared, bytenr);
1444                 wc->nodes[level] = node;
1445                 wc->active_node = level;
1446                 return 0;
1447         }
1448
1449         if (wc->root_level == wc->active_node &&
1450             btrfs_root_refs(&root->root_item) == 0) {
1451                 if (--node->refs == 0) {
1452                         free_inode_recs_tree(&node->root_cache);
1453                         free_inode_recs_tree(&node->inode_cache);
1454                         remove_cache_extent(&wc->shared, &node->cache);
1455                         free(node);
1456                 }
1457                 return 1;
1458         }
1459
1460         dest = wc->nodes[wc->active_node];
1461         splice_shared_node(node, dest);
1462         if (node->refs == 0) {
1463                 remove_cache_extent(&wc->shared, &node->cache);
1464                 free(node);
1465         }
1466         return 1;
1467 }
1468
1469 static int leave_shared_node(struct btrfs_root *root,
1470                              struct walk_control *wc, int level)
1471 {
1472         struct shared_node *node;
1473         struct shared_node *dest;
1474         int i;
1475
1476         if (level == wc->root_level)
1477                 return 0;
1478
1479         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1480                 if (wc->nodes[i])
1481                         break;
1482         }
1483         BUG_ON(i >= BTRFS_MAX_LEVEL);
1484
1485         node = wc->nodes[wc->active_node];
1486         wc->nodes[wc->active_node] = NULL;
1487         wc->active_node = i;
1488
1489         dest = wc->nodes[wc->active_node];
1490         if (wc->active_node < wc->root_level ||
1491             btrfs_root_refs(&root->root_item) > 0) {
1492                 BUG_ON(node->refs <= 1);
1493                 splice_shared_node(node, dest);
1494         } else {
1495                 BUG_ON(node->refs < 2);
1496                 node->refs--;
1497         }
1498         return 0;
1499 }
1500
1501 /*
1502  * Returns:
1503  * < 0 - on error
1504  * 1   - if the root with id child_root_id is a child of root parent_root_id
1505  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1506  *       has other root(s) as parent(s)
1507  * 2   - if the root child_root_id doesn't have any parent roots
1508  */
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510                          u64 child_root_id)
1511 {
1512         struct btrfs_path path;
1513         struct btrfs_key key;
1514         struct extent_buffer *leaf;
1515         int has_parent = 0;
1516         int ret;
1517
1518         btrfs_init_path(&path);
1519
1520         key.objectid = parent_root_id;
1521         key.type = BTRFS_ROOT_REF_KEY;
1522         key.offset = child_root_id;
1523         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1524                                 0, 0);
1525         if (ret < 0)
1526                 return ret;
1527         btrfs_release_path(&path);
1528         if (!ret)
1529                 return 1;
1530
1531         key.objectid = child_root_id;
1532         key.type = BTRFS_ROOT_BACKREF_KEY;
1533         key.offset = 0;
1534         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1535                                 0, 0);
1536         if (ret < 0)
1537                 goto out;
1538
1539         while (1) {
1540                 leaf = path.nodes[0];
1541                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543                         if (ret)
1544                                 break;
1545                         leaf = path.nodes[0];
1546                 }
1547
1548                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549                 if (key.objectid != child_root_id ||
1550                     key.type != BTRFS_ROOT_BACKREF_KEY)
1551                         break;
1552
1553                 has_parent = 1;
1554
1555                 if (key.offset == parent_root_id) {
1556                         btrfs_release_path(&path);
1557                         return 1;
1558                 }
1559
1560                 path.slots[0]++;
1561         }
1562 out:
1563         btrfs_release_path(&path);
1564         if (ret < 0)
1565                 return ret;
1566         return has_parent ? 0 : 2;
1567 }
1568
1569 static int process_dir_item(struct extent_buffer *eb,
1570                             int slot, struct btrfs_key *key,
1571                             struct shared_node *active_node)
1572 {
1573         u32 total;
1574         u32 cur = 0;
1575         u32 len;
1576         u32 name_len;
1577         u32 data_len;
1578         int error;
1579         int nritems = 0;
1580         u8 filetype;
1581         struct btrfs_dir_item *di;
1582         struct inode_record *rec;
1583         struct cache_tree *root_cache;
1584         struct cache_tree *inode_cache;
1585         struct btrfs_key location;
1586         char namebuf[BTRFS_NAME_LEN];
1587
1588         root_cache = &active_node->root_cache;
1589         inode_cache = &active_node->inode_cache;
1590         rec = active_node->current;
1591         rec->found_dir_item = 1;
1592
1593         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594         total = btrfs_item_size_nr(eb, slot);
1595         while (cur < total) {
1596                 nritems++;
1597                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598                 name_len = btrfs_dir_name_len(eb, di);
1599                 data_len = btrfs_dir_data_len(eb, di);
1600                 filetype = btrfs_dir_type(eb, di);
1601
1602                 rec->found_size += name_len;
1603                 if (cur + sizeof(*di) + name_len > total ||
1604                     name_len > BTRFS_NAME_LEN) {
1605                         error = REF_ERR_NAME_TOO_LONG;
1606
1607                         if (cur + sizeof(*di) > total)
1608                                 break;
1609                         len = min_t(u32, total - cur - sizeof(*di),
1610                                     BTRFS_NAME_LEN);
1611                 } else {
1612                         len = name_len;
1613                         error = 0;
1614                 }
1615
1616                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1617
1618                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619                     key->offset != btrfs_name_hash(namebuf, len)) {
1620                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1621                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622                         key->objectid, key->offset, namebuf, len, filetype,
1623                         key->offset, btrfs_name_hash(namebuf, len));
1624                 }
1625
1626                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627                         add_inode_backref(inode_cache, location.objectid,
1628                                           key->objectid, key->offset, namebuf,
1629                                           len, filetype, key->type, error);
1630                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631                         add_inode_backref(root_cache, location.objectid,
1632                                           key->objectid, key->offset,
1633                                           namebuf, len, filetype,
1634                                           key->type, error);
1635                 } else {
1636                         fprintf(stderr, "invalid location in dir item %u\n",
1637                                 location.type);
1638                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639                                           key->objectid, key->offset, namebuf,
1640                                           len, filetype, key->type, error);
1641                 }
1642
1643                 len = sizeof(*di) + name_len + data_len;
1644                 di = (struct btrfs_dir_item *)((char *)di + len);
1645                 cur += len;
1646         }
1647         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1649
1650         return 0;
1651 }
1652
1653 static int process_inode_ref(struct extent_buffer *eb,
1654                              int slot, struct btrfs_key *key,
1655                              struct shared_node *active_node)
1656 {
1657         u32 total;
1658         u32 cur = 0;
1659         u32 len;
1660         u32 name_len;
1661         u64 index;
1662         int error;
1663         struct cache_tree *inode_cache;
1664         struct btrfs_inode_ref *ref;
1665         char namebuf[BTRFS_NAME_LEN];
1666
1667         inode_cache = &active_node->inode_cache;
1668
1669         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670         total = btrfs_item_size_nr(eb, slot);
1671         while (cur < total) {
1672                 name_len = btrfs_inode_ref_name_len(eb, ref);
1673                 index = btrfs_inode_ref_index(eb, ref);
1674
1675                 /* inode_ref + namelen should not cross item boundary */
1676                 if (cur + sizeof(*ref) + name_len > total ||
1677                     name_len > BTRFS_NAME_LEN) {
1678                         if (total < cur + sizeof(*ref))
1679                                 break;
1680
1681                         /* Still try to read out the remaining part */
1682                         len = min_t(u32, total - cur - sizeof(*ref),
1683                                     BTRFS_NAME_LEN);
1684                         error = REF_ERR_NAME_TOO_LONG;
1685                 } else {
1686                         len = name_len;
1687                         error = 0;
1688                 }
1689
1690                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691                 add_inode_backref(inode_cache, key->objectid, key->offset,
1692                                   index, namebuf, len, 0, key->type, error);
1693
1694                 len = sizeof(*ref) + name_len;
1695                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1696                 cur += len;
1697         }
1698         return 0;
1699 }
1700
1701 static int process_inode_extref(struct extent_buffer *eb,
1702                                 int slot, struct btrfs_key *key,
1703                                 struct shared_node *active_node)
1704 {
1705         u32 total;
1706         u32 cur = 0;
1707         u32 len;
1708         u32 name_len;
1709         u64 index;
1710         u64 parent;
1711         int error;
1712         struct cache_tree *inode_cache;
1713         struct btrfs_inode_extref *extref;
1714         char namebuf[BTRFS_NAME_LEN];
1715
1716         inode_cache = &active_node->inode_cache;
1717
1718         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719         total = btrfs_item_size_nr(eb, slot);
1720         while (cur < total) {
1721                 name_len = btrfs_inode_extref_name_len(eb, extref);
1722                 index = btrfs_inode_extref_index(eb, extref);
1723                 parent = btrfs_inode_extref_parent(eb, extref);
1724                 if (name_len <= BTRFS_NAME_LEN) {
1725                         len = name_len;
1726                         error = 0;
1727                 } else {
1728                         len = BTRFS_NAME_LEN;
1729                         error = REF_ERR_NAME_TOO_LONG;
1730                 }
1731                 read_extent_buffer(eb, namebuf,
1732                                    (unsigned long)(extref + 1), len);
1733                 add_inode_backref(inode_cache, key->objectid, parent,
1734                                   index, namebuf, len, 0, key->type, error);
1735
1736                 len = sizeof(*extref) + name_len;
1737                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1738                 cur += len;
1739         }
1740         return 0;
1741
1742 }
1743
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745                             u64 len, u64 *found)
1746 {
1747         struct btrfs_key key;
1748         struct btrfs_path path;
1749         struct extent_buffer *leaf;
1750         int ret;
1751         size_t size;
1752         *found = 0;
1753         u64 csum_end;
1754         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1755
1756         btrfs_init_path(&path);
1757
1758         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1759         key.offset = start;
1760         key.type = BTRFS_EXTENT_CSUM_KEY;
1761
1762         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1763                                 &key, &path, 0, 0);
1764         if (ret < 0)
1765                 goto out;
1766         if (ret > 0 && path.slots[0] > 0) {
1767                 leaf = path.nodes[0];
1768                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770                     key.type == BTRFS_EXTENT_CSUM_KEY)
1771                         path.slots[0]--;
1772         }
1773
1774         while (len > 0) {
1775                 leaf = path.nodes[0];
1776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1778                         if (ret > 0)
1779                                 break;
1780                         else if (ret < 0)
1781                                 goto out;
1782                         leaf = path.nodes[0];
1783                 }
1784
1785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787                     key.type != BTRFS_EXTENT_CSUM_KEY)
1788                         break;
1789
1790                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791                 if (key.offset >= start + len)
1792                         break;
1793
1794                 if (key.offset > start)
1795                         start = key.offset;
1796
1797                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798                 csum_end = key.offset + (size / csum_size) *
1799                            root->fs_info->sectorsize;
1800                 if (csum_end > start) {
1801                         size = min(csum_end - start, len);
1802                         len -= size;
1803                         start += size;
1804                         *found += size;
1805                 }
1806
1807                 path.slots[0]++;
1808         }
1809 out:
1810         btrfs_release_path(&path);
1811         if (ret < 0)
1812                 return ret;
1813         return 0;
1814 }
1815
1816 static int process_file_extent(struct btrfs_root *root,
1817                                 struct extent_buffer *eb,
1818                                 int slot, struct btrfs_key *key,
1819                                 struct shared_node *active_node)
1820 {
1821         struct inode_record *rec;
1822         struct btrfs_file_extent_item *fi;
1823         u64 num_bytes = 0;
1824         u64 disk_bytenr = 0;
1825         u64 extent_offset = 0;
1826         u64 mask = root->fs_info->sectorsize - 1;
1827         int extent_type;
1828         int ret;
1829
1830         rec = active_node->current;
1831         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832         rec->found_file_extent = 1;
1833
1834         if (rec->extent_start == (u64)-1) {
1835                 rec->extent_start = key->offset;
1836                 rec->extent_end = key->offset;
1837         }
1838
1839         if (rec->extent_end > key->offset)
1840                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841         else if (rec->extent_end < key->offset) {
1842                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843                                            key->offset - rec->extent_end);
1844                 if (ret < 0)
1845                         return ret;
1846         }
1847
1848         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849         extent_type = btrfs_file_extent_type(eb, fi);
1850
1851         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1853                 if (num_bytes == 0)
1854                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855                 rec->found_size += num_bytes;
1856                 num_bytes = (num_bytes + mask) & ~mask;
1857         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861                 extent_offset = btrfs_file_extent_offset(eb, fi);
1862                 if (num_bytes == 0 || (num_bytes & mask))
1863                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864                 if (num_bytes + extent_offset >
1865                     btrfs_file_extent_ram_bytes(eb, fi))
1866                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868                     (btrfs_file_extent_compression(eb, fi) ||
1869                      btrfs_file_extent_encryption(eb, fi) ||
1870                      btrfs_file_extent_other_encoding(eb, fi)))
1871                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872                 if (disk_bytenr > 0)
1873                         rec->found_size += num_bytes;
1874         } else {
1875                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1876         }
1877         rec->extent_end = key->offset + num_bytes;
1878
1879         /*
1880          * The data reloc tree will copy full extents into its inode and then
1881          * copy the corresponding csums.  Because the extent it copied could be
1882          * a preallocated extent that hasn't been written to yet there may be no
1883          * csums to copy, ergo we won't have csums for our file extent.  This is
1884          * ok so just don't bother checking csums if the inode belongs to the
1885          * data reloc tree.
1886          */
1887         if (disk_bytenr > 0 &&
1888             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1889                 u64 found;
1890                 if (btrfs_file_extent_compression(eb, fi))
1891                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1892                 else
1893                         disk_bytenr += extent_offset;
1894
1895                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896                 if (ret < 0)
1897                         return ret;
1898                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1899                         if (found > 0)
1900                                 rec->found_csum_item = 1;
1901                         if (found < num_bytes)
1902                                 rec->some_csum_missing = 1;
1903                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1904                         if (found > 0)
1905                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1906                 }
1907         }
1908         return 0;
1909 }
1910
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912                             struct walk_control *wc)
1913 {
1914         struct btrfs_key key;
1915         u32 nritems;
1916         int i;
1917         int ret = 0;
1918         struct cache_tree *inode_cache;
1919         struct shared_node *active_node;
1920
1921         if (wc->root_level == wc->active_node &&
1922             btrfs_root_refs(&root->root_item) == 0)
1923                 return 0;
1924
1925         active_node = wc->nodes[wc->active_node];
1926         inode_cache = &active_node->inode_cache;
1927         nritems = btrfs_header_nritems(eb);
1928         for (i = 0; i < nritems; i++) {
1929                 btrfs_item_key_to_cpu(eb, &key, i);
1930
1931                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1932                         continue;
1933                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934                         continue;
1935
1936                 if (active_node->current == NULL ||
1937                     active_node->current->ino < key.objectid) {
1938                         if (active_node->current) {
1939                                 active_node->current->checked = 1;
1940                                 maybe_free_inode_rec(inode_cache,
1941                                                      active_node->current);
1942                         }
1943                         active_node->current = get_inode_rec(inode_cache,
1944                                                              key.objectid, 1);
1945                         BUG_ON(IS_ERR(active_node->current));
1946                 }
1947                 switch (key.type) {
1948                 case BTRFS_DIR_ITEM_KEY:
1949                 case BTRFS_DIR_INDEX_KEY:
1950                         ret = process_dir_item(eb, i, &key, active_node);
1951                         break;
1952                 case BTRFS_INODE_REF_KEY:
1953                         ret = process_inode_ref(eb, i, &key, active_node);
1954                         break;
1955                 case BTRFS_INODE_EXTREF_KEY:
1956                         ret = process_inode_extref(eb, i, &key, active_node);
1957                         break;
1958                 case BTRFS_INODE_ITEM_KEY:
1959                         ret = process_inode_item(eb, i, &key, active_node);
1960                         break;
1961                 case BTRFS_EXTENT_DATA_KEY:
1962                         ret = process_file_extent(root, eb, i, &key,
1963                                                   active_node);
1964                         break;
1965                 default:
1966                         break;
1967                 };
1968         }
1969         return ret;
1970 }
1971
1972 struct node_refs {
1973         u64 bytenr[BTRFS_MAX_LEVEL];
1974         u64 refs[BTRFS_MAX_LEVEL];
1975         int need_check[BTRFS_MAX_LEVEL];
1976 };
1977
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979                              struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981                             unsigned int ext_ref);
1982
1983 /*
1984  * Returns >0  Found error, not fatal, should continue
1985  * Returns <0  Fatal error, must exit the whole check
1986  * Returns 0   No errors found
1987  */
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989                                struct node_refs *nrefs, int *level, int ext_ref)
1990 {
1991         struct extent_buffer *cur = path->nodes[0];
1992         struct btrfs_key key;
1993         u64 cur_bytenr;
1994         u32 nritems;
1995         u64 first_ino = 0;
1996         int root_level = btrfs_header_level(root->node);
1997         int i;
1998         int ret = 0; /* Final return value */
1999         int err = 0; /* Positive error bitmap */
2000
2001         cur_bytenr = cur->start;
2002
2003         /* skip to first inode item or the first inode number change */
2004         nritems = btrfs_header_nritems(cur);
2005         for (i = 0; i < nritems; i++) {
2006                 btrfs_item_key_to_cpu(cur, &key, i);
2007                 if (i == 0)
2008                         first_ino = key.objectid;
2009                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010                     (first_ino && first_ino != key.objectid))
2011                         break;
2012         }
2013         if (i == nritems) {
2014                 path->slots[0] = nritems;
2015                 return 0;
2016         }
2017         path->slots[0] = i;
2018
2019 again:
2020         err |= check_inode_item(root, path, ext_ref);
2021
2022         /* modify cur since check_inode_item may change path */
2023         cur = path->nodes[0];
2024
2025         if (err & LAST_ITEM)
2026                 goto out;
2027
2028         /* still have inode items in thie leaf */
2029         if (cur->start == cur_bytenr)
2030                 goto again;
2031
2032         /*
2033          * we have switched to another leaf, above nodes may
2034          * have changed, here walk down the path, if a node
2035          * or leaf is shared, check whether we can skip this
2036          * node or leaf.
2037          */
2038         for (i = root_level; i >= 0; i--) {
2039                 if (path->nodes[i]->start == nrefs->bytenr[i])
2040                         continue;
2041
2042                 ret = update_nodes_refs(root,
2043                                 path->nodes[i]->start,
2044                                 nrefs, i);
2045                 if (ret)
2046                         goto out;
2047
2048                 if (!nrefs->need_check[i]) {
2049                         *level += 1;
2050                         break;
2051                 }
2052         }
2053
2054         for (i = 0; i < *level; i++) {
2055                 free_extent_buffer(path->nodes[i]);
2056                 path->nodes[i] = NULL;
2057         }
2058 out:
2059         err &= ~LAST_ITEM;
2060         if (err && !ret)
2061                 ret = err;
2062         return ret;
2063 }
2064
2065 static void reada_walk_down(struct btrfs_root *root,
2066                             struct extent_buffer *node, int slot)
2067 {
2068         struct btrfs_fs_info *fs_info = root->fs_info;
2069         u64 bytenr;
2070         u64 ptr_gen;
2071         u32 nritems;
2072         int i;
2073         int level;
2074
2075         level = btrfs_header_level(node);
2076         if (level != 1)
2077                 return;
2078
2079         nritems = btrfs_header_nritems(node);
2080         for (i = slot; i < nritems; i++) {
2081                 bytenr = btrfs_node_blockptr(node, i);
2082                 ptr_gen = btrfs_node_ptr_generation(node, i);
2083                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2084         }
2085 }
2086
2087 /*
2088  * Check the child node/leaf by the following condition:
2089  * 1. the first item key of the node/leaf should be the same with the one
2090  *    in parent.
2091  * 2. block in parent node should match the child node/leaf.
2092  * 3. generation of parent node and child's header should be consistent.
2093  *
2094  * Or the child node/leaf pointed by the key in parent is not valid.
2095  *
2096  * We hope to check leaf owner too, but since subvol may share leaves,
2097  * which makes leaf owner check not so strong, key check should be
2098  * sufficient enough for that case.
2099  */
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101                             struct extent_buffer *child)
2102 {
2103         struct btrfs_key parent_key;
2104         struct btrfs_key child_key;
2105         int ret = 0;
2106
2107         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108         if (btrfs_header_level(child) == 0)
2109                 btrfs_item_key_to_cpu(child, &child_key, 0);
2110         else
2111                 btrfs_node_key_to_cpu(child, &child_key, 0);
2112
2113         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114                 ret = -EINVAL;
2115                 fprintf(stderr,
2116                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117                         parent_key.objectid, parent_key.type, parent_key.offset,
2118                         child_key.objectid, child_key.type, child_key.offset);
2119         }
2120         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2121                 ret = -EINVAL;
2122                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123                         btrfs_node_blockptr(parent, slot),
2124                         btrfs_header_bytenr(child));
2125         }
2126         if (btrfs_node_ptr_generation(parent, slot) !=
2127             btrfs_header_generation(child)) {
2128                 ret = -EINVAL;
2129                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130                         btrfs_header_generation(child),
2131                         btrfs_node_ptr_generation(parent, slot));
2132         }
2133         return ret;
2134 }
2135
2136 /*
2137  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138  * in every fs or file tree check. Here we find its all root ids, and only check
2139  * it in the fs or file tree which has the smallest root id.
2140  */
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2142 {
2143         struct rb_node *node;
2144         struct ulist_node *u;
2145
2146         if (roots->nnodes == 1)
2147                 return 1;
2148
2149         node = rb_first(&roots->root);
2150         u = rb_entry(node, struct ulist_node, rb_node);
2151         /*
2152          * current root id is not smallest, we skip it and let it be checked
2153          * in the fs or file tree who hash the smallest root id.
2154          */
2155         if (root->objectid != u->val)
2156                 return 0;
2157
2158         return 1;
2159 }
2160
2161 /*
2162  * for a tree node or leaf, we record its reference count, so later if we still
2163  * process this node or leaf, don't need to compute its reference count again.
2164  */
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166                              struct node_refs *nrefs, u64 level)
2167 {
2168         int check, ret;
2169         u64 refs;
2170         struct ulist *roots;
2171
2172         if (nrefs->bytenr[level] != bytenr) {
2173                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174                                        level, 1, &refs, NULL);
2175                 if (ret < 0)
2176                         return ret;
2177
2178                 nrefs->bytenr[level] = bytenr;
2179                 nrefs->refs[level] = refs;
2180                 if (refs > 1) {
2181                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2182                                                    0, &roots);
2183                         if (ret)
2184                                 return -EIO;
2185
2186                         check = need_check(root, roots);
2187                         ulist_free(roots);
2188                         nrefs->need_check[level] = check;
2189                 } else {
2190                         nrefs->need_check[level] = 1;
2191                 }
2192         }
2193
2194         return 0;
2195 }
2196
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198                           struct walk_control *wc, int *level,
2199                           struct node_refs *nrefs)
2200 {
2201         enum btrfs_tree_block_status status;
2202         u64 bytenr;
2203         u64 ptr_gen;
2204         struct btrfs_fs_info *fs_info = root->fs_info;
2205         struct extent_buffer *next;
2206         struct extent_buffer *cur;
2207         int ret, err = 0;
2208         u64 refs;
2209
2210         WARN_ON(*level < 0);
2211         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2212
2213         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214                 refs = nrefs->refs[*level];
2215                 ret = 0;
2216         } else {
2217                 ret = btrfs_lookup_extent_info(NULL, root,
2218                                        path->nodes[*level]->start,
2219                                        *level, 1, &refs, NULL);
2220                 if (ret < 0) {
2221                         err = ret;
2222                         goto out;
2223                 }
2224                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225                 nrefs->refs[*level] = refs;
2226         }
2227
2228         if (refs > 1) {
2229                 ret = enter_shared_node(root, path->nodes[*level]->start,
2230                                         refs, wc, *level);
2231                 if (ret > 0) {
2232                         err = ret;
2233                         goto out;
2234                 }
2235         }
2236
2237         while (*level >= 0) {
2238                 WARN_ON(*level < 0);
2239                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240                 cur = path->nodes[*level];
2241
2242                 if (btrfs_header_level(cur) != *level)
2243                         WARN_ON(1);
2244
2245                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246                         break;
2247                 if (*level == 0) {
2248                         ret = process_one_leaf(root, cur, wc);
2249                         if (ret < 0)
2250                                 err = ret;
2251                         break;
2252                 }
2253                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2255
2256                 if (bytenr == nrefs->bytenr[*level - 1]) {
2257                         refs = nrefs->refs[*level - 1];
2258                 } else {
2259                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260                                         *level - 1, 1, &refs, NULL);
2261                         if (ret < 0) {
2262                                 refs = 0;
2263                         } else {
2264                                 nrefs->bytenr[*level - 1] = bytenr;
2265                                 nrefs->refs[*level - 1] = refs;
2266                         }
2267                 }
2268
2269                 if (refs > 1) {
2270                         ret = enter_shared_node(root, bytenr, refs,
2271                                                 wc, *level - 1);
2272                         if (ret > 0) {
2273                                 path->slots[*level]++;
2274                                 continue;
2275                         }
2276                 }
2277
2278                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280                         free_extent_buffer(next);
2281                         reada_walk_down(root, cur, path->slots[*level]);
2282                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283                         if (!extent_buffer_uptodate(next)) {
2284                                 struct btrfs_key node_key;
2285
2286                                 btrfs_node_key_to_cpu(path->nodes[*level],
2287                                                       &node_key,
2288                                                       path->slots[*level]);
2289                                 btrfs_add_corrupt_extent_record(root->fs_info,
2290                                                 &node_key,
2291                                                 path->nodes[*level]->start,
2292                                                 root->fs_info->nodesize,
2293                                                 *level);
2294                                 err = -EIO;
2295                                 goto out;
2296                         }
2297                 }
2298
2299                 ret = check_child_node(cur, path->slots[*level], next);
2300                 if (ret) {
2301                         free_extent_buffer(next);
2302                         err = ret;
2303                         goto out;
2304                 }
2305
2306                 if (btrfs_is_leaf(next))
2307                         status = btrfs_check_leaf(root, NULL, next);
2308                 else
2309                         status = btrfs_check_node(root, NULL, next);
2310                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311                         free_extent_buffer(next);
2312                         err = -EIO;
2313                         goto out;
2314                 }
2315
2316                 *level = *level - 1;
2317                 free_extent_buffer(path->nodes[*level]);
2318                 path->nodes[*level] = next;
2319                 path->slots[*level] = 0;
2320         }
2321 out:
2322         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2323         return err;
2324 }
2325
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327                             unsigned int ext_ref);
2328
2329 /*
2330  * Returns >0  Found error, should continue
2331  * Returns <0  Fatal error, must exit the whole check
2332  * Returns 0   No errors found
2333  */
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335                              int *level, struct node_refs *nrefs, int ext_ref)
2336 {
2337         enum btrfs_tree_block_status status;
2338         u64 bytenr;
2339         u64 ptr_gen;
2340         struct btrfs_fs_info *fs_info = root->fs_info;
2341         struct extent_buffer *next;
2342         struct extent_buffer *cur;
2343         int ret;
2344
2345         WARN_ON(*level < 0);
2346         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2347
2348         ret = update_nodes_refs(root, path->nodes[*level]->start,
2349                                 nrefs, *level);
2350         if (ret < 0)
2351                 return ret;
2352
2353         while (*level >= 0) {
2354                 WARN_ON(*level < 0);
2355                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356                 cur = path->nodes[*level];
2357
2358                 if (btrfs_header_level(cur) != *level)
2359                         WARN_ON(1);
2360
2361                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2362                         break;
2363                 /* Don't forgot to check leaf/node validation */
2364                 if (*level == 0) {
2365                         ret = btrfs_check_leaf(root, NULL, cur);
2366                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2367                                 ret = -EIO;
2368                                 break;
2369                         }
2370                         ret = process_one_leaf_v2(root, path, nrefs,
2371                                                   level, ext_ref);
2372                         cur = path->nodes[*level];
2373                         break;
2374                 } else {
2375                         ret = btrfs_check_node(root, NULL, cur);
2376                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2377                                 ret = -EIO;
2378                                 break;
2379                         }
2380                 }
2381                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2383
2384                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385                 if (ret)
2386                         break;
2387                 if (!nrefs->need_check[*level - 1]) {
2388                         path->slots[*level]++;
2389                         continue;
2390                 }
2391
2392                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394                         free_extent_buffer(next);
2395                         reada_walk_down(root, cur, path->slots[*level]);
2396                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2397                         if (!extent_buffer_uptodate(next)) {
2398                                 struct btrfs_key node_key;
2399
2400                                 btrfs_node_key_to_cpu(path->nodes[*level],
2401                                                       &node_key,
2402                                                       path->slots[*level]);
2403                                 btrfs_add_corrupt_extent_record(fs_info,
2404                                                 &node_key,
2405                                                 path->nodes[*level]->start,
2406                                                 fs_info->nodesize,
2407                                                 *level);
2408                                 ret = -EIO;
2409                                 break;
2410                         }
2411                 }
2412
2413                 ret = check_child_node(cur, path->slots[*level], next);
2414                 if (ret < 0) 
2415                         break;
2416
2417                 if (btrfs_is_leaf(next))
2418                         status = btrfs_check_leaf(root, NULL, next);
2419                 else
2420                         status = btrfs_check_node(root, NULL, next);
2421                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422                         free_extent_buffer(next);
2423                         ret = -EIO;
2424                         break;
2425                 }
2426
2427                 *level = *level - 1;
2428                 free_extent_buffer(path->nodes[*level]);
2429                 path->nodes[*level] = next;
2430                 path->slots[*level] = 0;
2431         }
2432         return ret;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int __create_inode_item(struct btrfs_trans_handle *trans,
2695                                struct btrfs_root *root, u64 ino, u64 size,
2696                                u64 nbytes, u64 nlink, u32 mode)
2697 {
2698         struct btrfs_inode_item ii;
2699         time_t now = time(NULL);
2700         int ret;
2701
2702         btrfs_set_stack_inode_size(&ii, size);
2703         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2704         btrfs_set_stack_inode_nlink(&ii, nlink);
2705         btrfs_set_stack_inode_mode(&ii, mode);
2706         btrfs_set_stack_inode_generation(&ii, trans->transid);
2707         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2708         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2709         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2710         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2711         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2712         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2713         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2714
2715         ret = btrfs_insert_inode(trans, root, ino, &ii);
2716         ASSERT(!ret);
2717
2718         warning("root %llu inode %llu recreating inode item, this may "
2719                 "be incomplete, please check permissions and content after "
2720                 "the fsck completes.\n", (unsigned long long)root->objectid,
2721                 (unsigned long long)ino);
2722
2723         return 0;
2724 }
2725
2726 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2727                                     struct btrfs_root *root, u64 ino,
2728                                     u8 filetype)
2729 {
2730         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2731
2732         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2733 }
2734
2735 static int create_inode_item(struct btrfs_root *root,
2736                              struct inode_record *rec, int root_dir)
2737 {
2738         struct btrfs_trans_handle *trans;
2739         u64 nlink = 0;
2740         u32 mode = 0;
2741         u64 size = 0;
2742         int ret;
2743
2744         trans = btrfs_start_transaction(root, 1);
2745         if (IS_ERR(trans)) {
2746                 ret = PTR_ERR(trans);
2747                 return ret;
2748         }
2749
2750         nlink = root_dir ? 1 : rec->found_link;
2751         if (rec->found_dir_item) {
2752                 if (rec->found_file_extent)
2753                         fprintf(stderr, "root %llu inode %llu has both a dir "
2754                                 "item and extents, unsure if it is a dir or a "
2755                                 "regular file so setting it as a directory\n",
2756                                 (unsigned long long)root->objectid,
2757                                 (unsigned long long)rec->ino);
2758                 mode = S_IFDIR | 0755;
2759                 size = rec->found_size;
2760         } else if (!rec->found_dir_item) {
2761                 size = rec->extent_end;
2762                 mode =  S_IFREG | 0755;
2763         }
2764
2765         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2766                                   nlink, mode);
2767         btrfs_commit_transaction(trans, root);
2768         return 0;
2769 }
2770
2771 static int repair_inode_backrefs(struct btrfs_root *root,
2772                                  struct inode_record *rec,
2773                                  struct cache_tree *inode_cache,
2774                                  int delete)
2775 {
2776         struct inode_backref *tmp, *backref;
2777         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2778         int ret = 0;
2779         int repaired = 0;
2780
2781         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2782                 if (!delete && rec->ino == root_dirid) {
2783                         if (!rec->found_inode_item) {
2784                                 ret = create_inode_item(root, rec, 1);
2785                                 if (ret)
2786                                         break;
2787                                 repaired++;
2788                         }
2789                 }
2790
2791                 /* Index 0 for root dir's are special, don't mess with it */
2792                 if (rec->ino == root_dirid && backref->index == 0)
2793                         continue;
2794
2795                 if (delete &&
2796                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2797                      (backref->found_dir_index && backref->found_inode_ref &&
2798                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2799                         ret = delete_dir_index(root, backref);
2800                         if (ret)
2801                                 break;
2802                         repaired++;
2803                         list_del(&backref->list);
2804                         free(backref);
2805                         continue;
2806                 }
2807
2808                 if (!delete && !backref->found_dir_index &&
2809                     backref->found_dir_item && backref->found_inode_ref) {
2810                         ret = add_missing_dir_index(root, inode_cache, rec,
2811                                                     backref);
2812                         if (ret)
2813                                 break;
2814                         repaired++;
2815                         if (backref->found_dir_item &&
2816                             backref->found_dir_index) {
2817                                 if (!backref->errors &&
2818                                     backref->found_inode_ref) {
2819                                         list_del(&backref->list);
2820                                         free(backref);
2821                                         continue;
2822                                 }
2823                         }
2824                 }
2825
2826                 if (!delete && (!backref->found_dir_index &&
2827                                 !backref->found_dir_item &&
2828                                 backref->found_inode_ref)) {
2829                         struct btrfs_trans_handle *trans;
2830                         struct btrfs_key location;
2831
2832                         ret = check_dir_conflict(root, backref->name,
2833                                                  backref->namelen,
2834                                                  backref->dir,
2835                                                  backref->index);
2836                         if (ret) {
2837                                 /*
2838                                  * let nlink fixing routine to handle it,
2839                                  * which can do it better.
2840                                  */
2841                                 ret = 0;
2842                                 break;
2843                         }
2844                         location.objectid = rec->ino;
2845                         location.type = BTRFS_INODE_ITEM_KEY;
2846                         location.offset = 0;
2847
2848                         trans = btrfs_start_transaction(root, 1);
2849                         if (IS_ERR(trans)) {
2850                                 ret = PTR_ERR(trans);
2851                                 break;
2852                         }
2853                         fprintf(stderr, "adding missing dir index/item pair "
2854                                 "for inode %llu\n",
2855                                 (unsigned long long)rec->ino);
2856                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2857                                                     backref->namelen,
2858                                                     backref->dir, &location,
2859                                                     imode_to_type(rec->imode),
2860                                                     backref->index);
2861                         BUG_ON(ret);
2862                         btrfs_commit_transaction(trans, root);
2863                         repaired++;
2864                 }
2865
2866                 if (!delete && (backref->found_inode_ref &&
2867                                 backref->found_dir_index &&
2868                                 backref->found_dir_item &&
2869                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2870                                 !rec->found_inode_item)) {
2871                         ret = create_inode_item(root, rec, 0);
2872                         if (ret)
2873                                 break;
2874                         repaired++;
2875                 }
2876
2877         }
2878         return ret ? ret : repaired;
2879 }
2880
2881 /*
2882  * To determine the file type for nlink/inode_item repair
2883  *
2884  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2885  * Return -ENOENT if file type is not found.
2886  */
2887 static int find_file_type(struct inode_record *rec, u8 *type)
2888 {
2889         struct inode_backref *backref;
2890
2891         /* For inode item recovered case */
2892         if (rec->found_inode_item) {
2893                 *type = imode_to_type(rec->imode);
2894                 return 0;
2895         }
2896
2897         list_for_each_entry(backref, &rec->backrefs, list) {
2898                 if (backref->found_dir_index || backref->found_dir_item) {
2899                         *type = backref->filetype;
2900                         return 0;
2901                 }
2902         }
2903         return -ENOENT;
2904 }
2905
2906 /*
2907  * To determine the file name for nlink repair
2908  *
2909  * Return 0 if file name is found, set name and namelen.
2910  * Return -ENOENT if file name is not found.
2911  */
2912 static int find_file_name(struct inode_record *rec,
2913                           char *name, int *namelen)
2914 {
2915         struct inode_backref *backref;
2916
2917         list_for_each_entry(backref, &rec->backrefs, list) {
2918                 if (backref->found_dir_index || backref->found_dir_item ||
2919                     backref->found_inode_ref) {
2920                         memcpy(name, backref->name, backref->namelen);
2921                         *namelen = backref->namelen;
2922                         return 0;
2923                 }
2924         }
2925         return -ENOENT;
2926 }
2927
2928 /* Reset the nlink of the inode to the correct one */
2929 static int reset_nlink(struct btrfs_trans_handle *trans,
2930                        struct btrfs_root *root,
2931                        struct btrfs_path *path,
2932                        struct inode_record *rec)
2933 {
2934         struct inode_backref *backref;
2935         struct inode_backref *tmp;
2936         struct btrfs_key key;
2937         struct btrfs_inode_item *inode_item;
2938         int ret = 0;
2939
2940         /* We don't believe this either, reset it and iterate backref */
2941         rec->found_link = 0;
2942
2943         /* Remove all backref including the valid ones */
2944         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2945                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2946                                    backref->index, backref->name,
2947                                    backref->namelen, 0);
2948                 if (ret < 0)
2949                         goto out;
2950
2951                 /* remove invalid backref, so it won't be added back */
2952                 if (!(backref->found_dir_index &&
2953                       backref->found_dir_item &&
2954                       backref->found_inode_ref)) {
2955                         list_del(&backref->list);
2956                         free(backref);
2957                 } else {
2958                         rec->found_link++;
2959                 }
2960         }
2961
2962         /* Set nlink to 0 */
2963         key.objectid = rec->ino;
2964         key.type = BTRFS_INODE_ITEM_KEY;
2965         key.offset = 0;
2966         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2967         if (ret < 0)
2968                 goto out;
2969         if (ret > 0) {
2970                 ret = -ENOENT;
2971                 goto out;
2972         }
2973         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2974                                     struct btrfs_inode_item);
2975         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2976         btrfs_mark_buffer_dirty(path->nodes[0]);
2977         btrfs_release_path(path);
2978
2979         /*
2980          * Add back valid inode_ref/dir_item/dir_index,
2981          * add_link() will handle the nlink inc, so new nlink must be correct
2982          */
2983         list_for_each_entry(backref, &rec->backrefs, list) {
2984                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2985                                      backref->name, backref->namelen,
2986                                      backref->filetype, &backref->index, 1, 0);
2987                 if (ret < 0)
2988                         goto out;
2989         }
2990 out:
2991         btrfs_release_path(path);
2992         return ret;
2993 }
2994
2995 static int get_highest_inode(struct btrfs_trans_handle *trans,
2996                                 struct btrfs_root *root,
2997                                 struct btrfs_path *path,
2998                                 u64 *highest_ino)
2999 {
3000         struct btrfs_key key, found_key;
3001         int ret;
3002
3003         btrfs_init_path(path);
3004         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3005         key.offset = -1;
3006         key.type = BTRFS_INODE_ITEM_KEY;
3007         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3008         if (ret == 1) {
3009                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3010                                 path->slots[0] - 1);
3011                 *highest_ino = found_key.objectid;
3012                 ret = 0;
3013         }
3014         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3015                 ret = -EOVERFLOW;
3016         btrfs_release_path(path);
3017         return ret;
3018 }
3019
3020 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3021                                struct btrfs_root *root,
3022                                struct btrfs_path *path,
3023                                struct inode_record *rec)
3024 {
3025         char *dir_name = "lost+found";
3026         char namebuf[BTRFS_NAME_LEN] = {0};
3027         u64 lost_found_ino;
3028         u32 mode = 0700;
3029         u8 type = 0;
3030         int namelen = 0;
3031         int name_recovered = 0;
3032         int type_recovered = 0;
3033         int ret = 0;
3034
3035         /*
3036          * Get file name and type first before these invalid inode ref
3037          * are deleted by remove_all_invalid_backref()
3038          */
3039         name_recovered = !find_file_name(rec, namebuf, &namelen);
3040         type_recovered = !find_file_type(rec, &type);
3041
3042         if (!name_recovered) {
3043                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3044                        rec->ino, rec->ino);
3045                 namelen = count_digits(rec->ino);
3046                 sprintf(namebuf, "%llu", rec->ino);
3047                 name_recovered = 1;
3048         }
3049         if (!type_recovered) {
3050                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3051                        rec->ino);
3052                 type = BTRFS_FT_REG_FILE;
3053                 type_recovered = 1;
3054         }
3055
3056         ret = reset_nlink(trans, root, path, rec);
3057         if (ret < 0) {
3058                 fprintf(stderr,
3059                         "Failed to reset nlink for inode %llu: %s\n",
3060                         rec->ino, strerror(-ret));
3061                 goto out;
3062         }
3063
3064         if (rec->found_link == 0) {
3065                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3066                 if (ret < 0)
3067                         goto out;
3068                 lost_found_ino++;
3069                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3070                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3071                                   mode);
3072                 if (ret < 0) {
3073                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3074                                 dir_name, strerror(-ret));
3075                         goto out;
3076                 }
3077                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3078                                      namebuf, namelen, type, NULL, 1, 0);
3079                 /*
3080                  * Add ".INO" suffix several times to handle case where
3081                  * "FILENAME.INO" is already taken by another file.
3082                  */
3083                 while (ret == -EEXIST) {
3084                         /*
3085                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3086                          */
3087                         if (namelen + count_digits(rec->ino) + 1 >
3088                             BTRFS_NAME_LEN) {
3089                                 ret = -EFBIG;
3090                                 goto out;
3091                         }
3092                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3093                                  ".%llu", rec->ino);
3094                         namelen += count_digits(rec->ino) + 1;
3095                         ret = btrfs_add_link(trans, root, rec->ino,
3096                                              lost_found_ino, namebuf,
3097                                              namelen, type, NULL, 1, 0);
3098                 }
3099                 if (ret < 0) {
3100                         fprintf(stderr,
3101                                 "Failed to link the inode %llu to %s dir: %s\n",
3102                                 rec->ino, dir_name, strerror(-ret));
3103                         goto out;
3104                 }
3105                 /*
3106                  * Just increase the found_link, don't actually add the
3107                  * backref. This will make things easier and this inode
3108                  * record will be freed after the repair is done.
3109                  * So fsck will not report problem about this inode.
3110                  */
3111                 rec->found_link++;
3112                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3113                        namelen, namebuf, dir_name);
3114         }
3115         printf("Fixed the nlink of inode %llu\n", rec->ino);
3116 out:
3117         /*
3118          * Clear the flag anyway, or we will loop forever for the same inode
3119          * as it will not be removed from the bad inode list and the dead loop
3120          * happens.
3121          */
3122         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3123         btrfs_release_path(path);
3124         return ret;
3125 }
3126
3127 /*
3128  * Check if there is any normal(reg or prealloc) file extent for given
3129  * ino.
3130  * This is used to determine the file type when neither its dir_index/item or
3131  * inode_item exists.
3132  *
3133  * This will *NOT* report error, if any error happens, just consider it does
3134  * not have any normal file extent.
3135  */
3136 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3137 {
3138         struct btrfs_path path;
3139         struct btrfs_key key;
3140         struct btrfs_key found_key;
3141         struct btrfs_file_extent_item *fi;
3142         u8 type;
3143         int ret = 0;
3144
3145         btrfs_init_path(&path);
3146         key.objectid = ino;
3147         key.type = BTRFS_EXTENT_DATA_KEY;
3148         key.offset = 0;
3149
3150         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3151         if (ret < 0) {
3152                 ret = 0;
3153                 goto out;
3154         }
3155         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3156                 ret = btrfs_next_leaf(root, &path);
3157                 if (ret) {
3158                         ret = 0;
3159                         goto out;
3160                 }
3161         }
3162         while (1) {
3163                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3164                                       path.slots[0]);
3165                 if (found_key.objectid != ino ||
3166                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3167                         break;
3168                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3169                                     struct btrfs_file_extent_item);
3170                 type = btrfs_file_extent_type(path.nodes[0], fi);
3171                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3172                         ret = 1;
3173                         goto out;
3174                 }
3175         }
3176 out:
3177         btrfs_release_path(&path);
3178         return ret;
3179 }
3180
3181 static u32 btrfs_type_to_imode(u8 type)
3182 {
3183         static u32 imode_by_btrfs_type[] = {
3184                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3185                 [BTRFS_FT_DIR]          = S_IFDIR,
3186                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3187                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3188                 [BTRFS_FT_FIFO]         = S_IFIFO,
3189                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3190                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3191         };
3192
3193         return imode_by_btrfs_type[(type)];
3194 }
3195
3196 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3197                                 struct btrfs_root *root,
3198                                 struct btrfs_path *path,
3199                                 struct inode_record *rec)
3200 {
3201         u8 filetype;
3202         u32 mode = 0700;
3203         int type_recovered = 0;
3204         int ret = 0;
3205
3206         printf("Trying to rebuild inode:%llu\n", rec->ino);
3207
3208         type_recovered = !find_file_type(rec, &filetype);
3209
3210         /*
3211          * Try to determine inode type if type not found.
3212          *
3213          * For found regular file extent, it must be FILE.
3214          * For found dir_item/index, it must be DIR.
3215          *
3216          * For undetermined one, use FILE as fallback.
3217          *
3218          * TODO:
3219          * 1. If found backref(inode_index/item is already handled) to it,
3220          *    it must be DIR.
3221          *    Need new inode-inode ref structure to allow search for that.
3222          */
3223         if (!type_recovered) {
3224                 if (rec->found_file_extent &&
3225                     find_normal_file_extent(root, rec->ino)) {
3226                         type_recovered = 1;
3227                         filetype = BTRFS_FT_REG_FILE;
3228                 } else if (rec->found_dir_item) {
3229                         type_recovered = 1;
3230                         filetype = BTRFS_FT_DIR;
3231                 } else if (!list_empty(&rec->orphan_extents)) {
3232                         type_recovered = 1;
3233                         filetype = BTRFS_FT_REG_FILE;
3234                 } else{
3235                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3236                                rec->ino);
3237                         type_recovered = 1;
3238                         filetype = BTRFS_FT_REG_FILE;
3239                 }
3240         }
3241
3242         ret = btrfs_new_inode(trans, root, rec->ino,
3243                               mode | btrfs_type_to_imode(filetype));
3244         if (ret < 0)
3245                 goto out;
3246
3247         /*
3248          * Here inode rebuild is done, we only rebuild the inode item,
3249          * don't repair the nlink(like move to lost+found).
3250          * That is the job of nlink repair.
3251          *
3252          * We just fill the record and return
3253          */
3254         rec->found_dir_item = 1;
3255         rec->imode = mode | btrfs_type_to_imode(filetype);
3256         rec->nlink = 0;
3257         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3258         /* Ensure the inode_nlinks repair function will be called */
3259         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3260 out:
3261         return ret;
3262 }
3263
3264 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3265                                       struct btrfs_root *root,
3266                                       struct btrfs_path *path,
3267                                       struct inode_record *rec)
3268 {
3269         struct orphan_data_extent *orphan;
3270         struct orphan_data_extent *tmp;
3271         int ret = 0;
3272
3273         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3274                 /*
3275                  * Check for conflicting file extents
3276                  *
3277                  * Here we don't know whether the extents is compressed or not,
3278                  * so we can only assume it not compressed nor data offset,
3279                  * and use its disk_len as extent length.
3280                  */
3281                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3282                                        orphan->offset, orphan->disk_len, 0);
3283                 btrfs_release_path(path);
3284                 if (ret < 0)
3285                         goto out;
3286                 if (!ret) {
3287                         fprintf(stderr,
3288                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3289                                 orphan->disk_bytenr, orphan->disk_len);
3290                         ret = btrfs_free_extent(trans,
3291                                         root->fs_info->extent_root,
3292                                         orphan->disk_bytenr, orphan->disk_len,
3293                                         0, root->objectid, orphan->objectid,
3294                                         orphan->offset);
3295                         if (ret < 0)
3296                                 goto out;
3297                 }
3298                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3299                                 orphan->offset, orphan->disk_bytenr,
3300                                 orphan->disk_len, orphan->disk_len);
3301                 if (ret < 0)
3302                         goto out;
3303
3304                 /* Update file size info */
3305                 rec->found_size += orphan->disk_len;
3306                 if (rec->found_size == rec->nbytes)
3307                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3308
3309                 /* Update the file extent hole info too */
3310                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3311                                            orphan->disk_len);
3312                 if (ret < 0)
3313                         goto out;
3314                 if (RB_EMPTY_ROOT(&rec->holes))
3315                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3316
3317                 list_del(&orphan->list);
3318                 free(orphan);
3319         }
3320         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3321 out:
3322         return ret;
3323 }
3324
3325 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3326                                         struct btrfs_root *root,
3327                                         struct btrfs_path *path,
3328                                         struct inode_record *rec)
3329 {
3330         struct rb_node *node;
3331         struct file_extent_hole *hole;
3332         int found = 0;
3333         int ret = 0;
3334
3335         node = rb_first(&rec->holes);
3336
3337         while (node) {
3338                 found = 1;
3339                 hole = rb_entry(node, struct file_extent_hole, node);
3340                 ret = btrfs_punch_hole(trans, root, rec->ino,
3341                                        hole->start, hole->len);
3342                 if (ret < 0)
3343                         goto out;
3344                 ret = del_file_extent_hole(&rec->holes, hole->start,
3345                                            hole->len);
3346                 if (ret < 0)
3347                         goto out;
3348                 if (RB_EMPTY_ROOT(&rec->holes))
3349                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3350                 node = rb_first(&rec->holes);
3351         }
3352         /* special case for a file losing all its file extent */
3353         if (!found) {
3354                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3355                                        round_up(rec->isize,
3356                                                 root->fs_info->sectorsize));
3357                 if (ret < 0)
3358                         goto out;
3359         }
3360         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3361                rec->ino, root->objectid);
3362 out:
3363         return ret;
3364 }
3365
3366 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3367 {
3368         struct btrfs_trans_handle *trans;
3369         struct btrfs_path path;
3370         int ret = 0;
3371
3372         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3373                              I_ERR_NO_ORPHAN_ITEM |
3374                              I_ERR_LINK_COUNT_WRONG |
3375                              I_ERR_NO_INODE_ITEM |
3376                              I_ERR_FILE_EXTENT_ORPHAN |
3377                              I_ERR_FILE_EXTENT_DISCOUNT|
3378                              I_ERR_FILE_NBYTES_WRONG)))
3379                 return rec->errors;
3380
3381         /*
3382          * For nlink repair, it may create a dir and add link, so
3383          * 2 for parent(256)'s dir_index and dir_item
3384          * 2 for lost+found dir's inode_item and inode_ref
3385          * 1 for the new inode_ref of the file
3386          * 2 for lost+found dir's dir_index and dir_item for the file
3387          */
3388         trans = btrfs_start_transaction(root, 7);
3389         if (IS_ERR(trans))
3390                 return PTR_ERR(trans);
3391
3392         btrfs_init_path(&path);
3393         if (rec->errors & I_ERR_NO_INODE_ITEM)
3394                 ret = repair_inode_no_item(trans, root, &path, rec);
3395         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3396                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3397         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3398                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3399         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3400                 ret = repair_inode_isize(trans, root, &path, rec);
3401         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3402                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3403         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3404                 ret = repair_inode_nlinks(trans, root, &path, rec);
3405         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3406                 ret = repair_inode_nbytes(trans, root, &path, rec);
3407         btrfs_commit_transaction(trans, root);
3408         btrfs_release_path(&path);
3409         return ret;
3410 }
3411
3412 static int check_inode_recs(struct btrfs_root *root,
3413                             struct cache_tree *inode_cache)
3414 {
3415         struct cache_extent *cache;
3416         struct ptr_node *node;
3417         struct inode_record *rec;
3418         struct inode_backref *backref;
3419         int stage = 0;
3420         int ret = 0;
3421         int err = 0;
3422         u64 error = 0;
3423         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3424
3425         if (btrfs_root_refs(&root->root_item) == 0) {
3426                 if (!cache_tree_empty(inode_cache))
3427                         fprintf(stderr, "warning line %d\n", __LINE__);
3428                 return 0;
3429         }
3430
3431         /*
3432          * We need to repair backrefs first because we could change some of the
3433          * errors in the inode recs.
3434          *
3435          * We also need to go through and delete invalid backrefs first and then
3436          * add the correct ones second.  We do this because we may get EEXIST
3437          * when adding back the correct index because we hadn't yet deleted the
3438          * invalid index.
3439          *
3440          * For example, if we were missing a dir index then the directories
3441          * isize would be wrong, so if we fixed the isize to what we thought it
3442          * would be and then fixed the backref we'd still have a invalid fs, so
3443          * we need to add back the dir index and then check to see if the isize
3444          * is still wrong.
3445          */
3446         while (stage < 3) {
3447                 stage++;
3448                 if (stage == 3 && !err)
3449                         break;
3450
3451                 cache = search_cache_extent(inode_cache, 0);
3452                 while (repair && cache) {
3453                         node = container_of(cache, struct ptr_node, cache);
3454                         rec = node->data;
3455                         cache = next_cache_extent(cache);
3456
3457                         /* Need to free everything up and rescan */
3458                         if (stage == 3) {
3459                                 remove_cache_extent(inode_cache, &node->cache);
3460                                 free(node);
3461                                 free_inode_rec(rec);
3462                                 continue;
3463                         }
3464
3465                         if (list_empty(&rec->backrefs))
3466                                 continue;
3467
3468                         ret = repair_inode_backrefs(root, rec, inode_cache,
3469                                                     stage == 1);
3470                         if (ret < 0) {
3471                                 err = ret;
3472                                 stage = 2;
3473                                 break;
3474                         } if (ret > 0) {
3475                                 err = -EAGAIN;
3476                         }
3477                 }
3478         }
3479         if (err)
3480                 return err;
3481
3482         rec = get_inode_rec(inode_cache, root_dirid, 0);
3483         BUG_ON(IS_ERR(rec));
3484         if (rec) {
3485                 ret = check_root_dir(rec);
3486                 if (ret) {
3487                         fprintf(stderr, "root %llu root dir %llu error\n",
3488                                 (unsigned long long)root->root_key.objectid,
3489                                 (unsigned long long)root_dirid);
3490                         print_inode_error(root, rec);
3491                         error++;
3492                 }
3493         } else {
3494                 if (repair) {
3495                         struct btrfs_trans_handle *trans;
3496
3497                         trans = btrfs_start_transaction(root, 1);
3498                         if (IS_ERR(trans)) {
3499                                 err = PTR_ERR(trans);
3500                                 return err;
3501                         }
3502
3503                         fprintf(stderr,
3504                                 "root %llu missing its root dir, recreating\n",
3505                                 (unsigned long long)root->objectid);
3506
3507                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3508                         BUG_ON(ret);
3509
3510                         btrfs_commit_transaction(trans, root);
3511                         return -EAGAIN;
3512                 }
3513
3514                 fprintf(stderr, "root %llu root dir %llu not found\n",
3515                         (unsigned long long)root->root_key.objectid,
3516                         (unsigned long long)root_dirid);
3517         }
3518
3519         while (1) {
3520                 cache = search_cache_extent(inode_cache, 0);
3521                 if (!cache)
3522                         break;
3523                 node = container_of(cache, struct ptr_node, cache);
3524                 rec = node->data;
3525                 remove_cache_extent(inode_cache, &node->cache);
3526                 free(node);
3527                 if (rec->ino == root_dirid ||
3528                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3529                         free_inode_rec(rec);
3530                         continue;
3531                 }
3532
3533                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3534                         ret = check_orphan_item(root, rec->ino);
3535                         if (ret == 0)
3536                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3537                         if (can_free_inode_rec(rec)) {
3538                                 free_inode_rec(rec);
3539                                 continue;
3540                         }
3541                 }
3542
3543                 if (!rec->found_inode_item)
3544                         rec->errors |= I_ERR_NO_INODE_ITEM;
3545                 if (rec->found_link != rec->nlink)
3546                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3547                 if (repair) {
3548                         ret = try_repair_inode(root, rec);
3549                         if (ret == 0 && can_free_inode_rec(rec)) {
3550                                 free_inode_rec(rec);
3551                                 continue;
3552                         }
3553                         ret = 0;
3554                 }
3555
3556                 if (!(repair && ret == 0))
3557                         error++;
3558                 print_inode_error(root, rec);
3559                 list_for_each_entry(backref, &rec->backrefs, list) {
3560                         if (!backref->found_dir_item)
3561                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3562                         if (!backref->found_dir_index)
3563                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3564                         if (!backref->found_inode_ref)
3565                                 backref->errors |= REF_ERR_NO_INODE_REF;
3566                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3567                                 " namelen %u name %s filetype %d errors %x",
3568                                 (unsigned long long)backref->dir,
3569                                 (unsigned long long)backref->index,
3570                                 backref->namelen, backref->name,
3571                                 backref->filetype, backref->errors);
3572                         print_ref_error(backref->errors);
3573                 }
3574                 free_inode_rec(rec);
3575         }
3576         return (error > 0) ? -1 : 0;
3577 }
3578
3579 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3580                                         u64 objectid)
3581 {
3582         struct cache_extent *cache;
3583         struct root_record *rec = NULL;
3584         int ret;
3585
3586         cache = lookup_cache_extent(root_cache, objectid, 1);
3587         if (cache) {
3588                 rec = container_of(cache, struct root_record, cache);
3589         } else {
3590                 rec = calloc(1, sizeof(*rec));
3591                 if (!rec)
3592                         return ERR_PTR(-ENOMEM);
3593                 rec->objectid = objectid;
3594                 INIT_LIST_HEAD(&rec->backrefs);
3595                 rec->cache.start = objectid;
3596                 rec->cache.size = 1;
3597
3598                 ret = insert_cache_extent(root_cache, &rec->cache);
3599                 if (ret)
3600                         return ERR_PTR(-EEXIST);
3601         }
3602         return rec;
3603 }
3604
3605 static struct root_backref *get_root_backref(struct root_record *rec,
3606                                              u64 ref_root, u64 dir, u64 index,
3607                                              const char *name, int namelen)
3608 {
3609         struct root_backref *backref;
3610
3611         list_for_each_entry(backref, &rec->backrefs, list) {
3612                 if (backref->ref_root != ref_root || backref->dir != dir ||
3613                     backref->namelen != namelen)
3614                         continue;
3615                 if (memcmp(name, backref->name, namelen))
3616                         continue;
3617                 return backref;
3618         }
3619
3620         backref = calloc(1, sizeof(*backref) + namelen + 1);
3621         if (!backref)
3622                 return NULL;
3623         backref->ref_root = ref_root;
3624         backref->dir = dir;
3625         backref->index = index;
3626         backref->namelen = namelen;
3627         memcpy(backref->name, name, namelen);
3628         backref->name[namelen] = '\0';
3629         list_add_tail(&backref->list, &rec->backrefs);
3630         return backref;
3631 }
3632
3633 static void free_root_record(struct cache_extent *cache)
3634 {
3635         struct root_record *rec;
3636         struct root_backref *backref;
3637
3638         rec = container_of(cache, struct root_record, cache);
3639         while (!list_empty(&rec->backrefs)) {
3640                 backref = to_root_backref(rec->backrefs.next);
3641                 list_del(&backref->list);
3642                 free(backref);
3643         }
3644
3645         free(rec);
3646 }
3647
3648 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3649
3650 static int add_root_backref(struct cache_tree *root_cache,
3651                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3652                             const char *name, int namelen,
3653                             int item_type, int errors)
3654 {
3655         struct root_record *rec;
3656         struct root_backref *backref;
3657
3658         rec = get_root_rec(root_cache, root_id);
3659         BUG_ON(IS_ERR(rec));
3660         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3661         BUG_ON(!backref);
3662
3663         backref->errors |= errors;
3664
3665         if (item_type != BTRFS_DIR_ITEM_KEY) {
3666                 if (backref->found_dir_index || backref->found_back_ref ||
3667                     backref->found_forward_ref) {
3668                         if (backref->index != index)
3669                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3670                 } else {
3671                         backref->index = index;
3672                 }
3673         }
3674
3675         if (item_type == BTRFS_DIR_ITEM_KEY) {
3676                 if (backref->found_forward_ref)
3677                         rec->found_ref++;
3678                 backref->found_dir_item = 1;
3679         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3680                 backref->found_dir_index = 1;
3681         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3682                 if (backref->found_forward_ref)
3683                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3684                 else if (backref->found_dir_item)
3685                         rec->found_ref++;
3686                 backref->found_forward_ref = 1;
3687         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3688                 if (backref->found_back_ref)
3689                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3690                 backref->found_back_ref = 1;
3691         } else {
3692                 BUG_ON(1);
3693         }
3694
3695         if (backref->found_forward_ref && backref->found_dir_item)
3696                 backref->reachable = 1;
3697         return 0;
3698 }
3699
3700 static int merge_root_recs(struct btrfs_root *root,
3701                            struct cache_tree *src_cache,
3702                            struct cache_tree *dst_cache)
3703 {
3704         struct cache_extent *cache;
3705         struct ptr_node *node;
3706         struct inode_record *rec;
3707         struct inode_backref *backref;
3708         int ret = 0;
3709
3710         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3711                 free_inode_recs_tree(src_cache);
3712                 return 0;
3713         }
3714
3715         while (1) {
3716                 cache = search_cache_extent(src_cache, 0);
3717                 if (!cache)
3718                         break;
3719                 node = container_of(cache, struct ptr_node, cache);
3720                 rec = node->data;
3721                 remove_cache_extent(src_cache, &node->cache);
3722                 free(node);
3723
3724                 ret = is_child_root(root, root->objectid, rec->ino);
3725                 if (ret < 0)
3726                         break;
3727                 else if (ret == 0)
3728                         goto skip;
3729
3730                 list_for_each_entry(backref, &rec->backrefs, list) {
3731                         BUG_ON(backref->found_inode_ref);
3732                         if (backref->found_dir_item)
3733                                 add_root_backref(dst_cache, rec->ino,
3734                                         root->root_key.objectid, backref->dir,
3735                                         backref->index, backref->name,
3736                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3737                                         backref->errors);
3738                         if (backref->found_dir_index)
3739                                 add_root_backref(dst_cache, rec->ino,
3740                                         root->root_key.objectid, backref->dir,
3741                                         backref->index, backref->name,
3742                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3743                                         backref->errors);
3744                 }
3745 skip:
3746                 free_inode_rec(rec);
3747         }
3748         if (ret < 0)
3749                 return ret;
3750         return 0;
3751 }
3752
3753 static int check_root_refs(struct btrfs_root *root,
3754                            struct cache_tree *root_cache)
3755 {
3756         struct root_record *rec;
3757         struct root_record *ref_root;
3758         struct root_backref *backref;
3759         struct cache_extent *cache;
3760         int loop = 1;
3761         int ret;
3762         int error;
3763         int errors = 0;
3764
3765         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3766         BUG_ON(IS_ERR(rec));
3767         rec->found_ref = 1;
3768
3769         /* fixme: this can not detect circular references */
3770         while (loop) {
3771                 loop = 0;
3772                 cache = search_cache_extent(root_cache, 0);
3773                 while (1) {
3774                         if (!cache)
3775                                 break;
3776                         rec = container_of(cache, struct root_record, cache);
3777                         cache = next_cache_extent(cache);
3778
3779                         if (rec->found_ref == 0)
3780                                 continue;
3781
3782                         list_for_each_entry(backref, &rec->backrefs, list) {
3783                                 if (!backref->reachable)
3784                                         continue;
3785
3786                                 ref_root = get_root_rec(root_cache,
3787                                                         backref->ref_root);
3788                                 BUG_ON(IS_ERR(ref_root));
3789                                 if (ref_root->found_ref > 0)
3790                                         continue;
3791
3792                                 backref->reachable = 0;
3793                                 rec->found_ref--;
3794                                 if (rec->found_ref == 0)
3795                                         loop = 1;
3796                         }
3797                 }
3798         }
3799
3800         cache = search_cache_extent(root_cache, 0);
3801         while (1) {
3802                 if (!cache)
3803                         break;
3804                 rec = container_of(cache, struct root_record, cache);
3805                 cache = next_cache_extent(cache);
3806
3807                 if (rec->found_ref == 0 &&
3808                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3809                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3810                         ret = check_orphan_item(root->fs_info->tree_root,
3811                                                 rec->objectid);
3812                         if (ret == 0)
3813                                 continue;
3814
3815                         /*
3816                          * If we don't have a root item then we likely just have
3817                          * a dir item in a snapshot for this root but no actual
3818                          * ref key or anything so it's meaningless.
3819                          */
3820                         if (!rec->found_root_item)
3821                                 continue;
3822                         errors++;
3823                         fprintf(stderr, "fs tree %llu not referenced\n",
3824                                 (unsigned long long)rec->objectid);
3825                 }
3826
3827                 error = 0;
3828                 if (rec->found_ref > 0 && !rec->found_root_item)
3829                         error = 1;
3830                 list_for_each_entry(backref, &rec->backrefs, list) {
3831                         if (!backref->found_dir_item)
3832                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3833                         if (!backref->found_dir_index)
3834                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3835                         if (!backref->found_back_ref)
3836                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3837                         if (!backref->found_forward_ref)
3838                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3839                         if (backref->reachable && backref->errors)
3840                                 error = 1;
3841                 }
3842                 if (!error)
3843                         continue;
3844
3845                 errors++;
3846                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3847                         (unsigned long long)rec->objectid, rec->found_ref,
3848                          rec->found_root_item ? "" : "not found");
3849
3850                 list_for_each_entry(backref, &rec->backrefs, list) {
3851                         if (!backref->reachable)
3852                                 continue;
3853                         if (!backref->errors && rec->found_root_item)
3854                                 continue;
3855                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3856                                 " index %llu namelen %u name %s errors %x\n",
3857                                 (unsigned long long)backref->ref_root,
3858                                 (unsigned long long)backref->dir,
3859                                 (unsigned long long)backref->index,
3860                                 backref->namelen, backref->name,
3861                                 backref->errors);
3862                         print_ref_error(backref->errors);
3863                 }
3864         }
3865         return errors > 0 ? 1 : 0;
3866 }
3867
3868 static int process_root_ref(struct extent_buffer *eb, int slot,
3869                             struct btrfs_key *key,
3870                             struct cache_tree *root_cache)
3871 {
3872         u64 dirid;
3873         u64 index;
3874         u32 len;
3875         u32 name_len;
3876         struct btrfs_root_ref *ref;
3877         char namebuf[BTRFS_NAME_LEN];
3878         int error;
3879
3880         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3881
3882         dirid = btrfs_root_ref_dirid(eb, ref);
3883         index = btrfs_root_ref_sequence(eb, ref);
3884         name_len = btrfs_root_ref_name_len(eb, ref);
3885
3886         if (name_len <= BTRFS_NAME_LEN) {
3887                 len = name_len;
3888                 error = 0;
3889         } else {
3890                 len = BTRFS_NAME_LEN;
3891                 error = REF_ERR_NAME_TOO_LONG;
3892         }
3893         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3894
3895         if (key->type == BTRFS_ROOT_REF_KEY) {
3896                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3897                                  index, namebuf, len, key->type, error);
3898         } else {
3899                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3900                                  index, namebuf, len, key->type, error);
3901         }
3902         return 0;
3903 }
3904
3905 static void free_corrupt_block(struct cache_extent *cache)
3906 {
3907         struct btrfs_corrupt_block *corrupt;
3908
3909         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3910         free(corrupt);
3911 }
3912
3913 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3914
3915 /*
3916  * Repair the btree of the given root.
3917  *
3918  * The fix is to remove the node key in corrupt_blocks cache_tree.
3919  * and rebalance the tree.
3920  * After the fix, the btree should be writeable.
3921  */
3922 static int repair_btree(struct btrfs_root *root,
3923                         struct cache_tree *corrupt_blocks)
3924 {
3925         struct btrfs_trans_handle *trans;
3926         struct btrfs_path path;
3927         struct btrfs_corrupt_block *corrupt;
3928         struct cache_extent *cache;
3929         struct btrfs_key key;
3930         u64 offset;
3931         int level;
3932         int ret = 0;
3933
3934         if (cache_tree_empty(corrupt_blocks))
3935                 return 0;
3936
3937         trans = btrfs_start_transaction(root, 1);
3938         if (IS_ERR(trans)) {
3939                 ret = PTR_ERR(trans);
3940                 fprintf(stderr, "Error starting transaction: %s\n",
3941                         strerror(-ret));
3942                 return ret;
3943         }
3944         btrfs_init_path(&path);
3945         cache = first_cache_extent(corrupt_blocks);
3946         while (cache) {
3947                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3948                                        cache);
3949                 level = corrupt->level;
3950                 path.lowest_level = level;
3951                 key.objectid = corrupt->key.objectid;
3952                 key.type = corrupt->key.type;
3953                 key.offset = corrupt->key.offset;
3954
3955                 /*
3956                  * Here we don't want to do any tree balance, since it may
3957                  * cause a balance with corrupted brother leaf/node,
3958                  * so ins_len set to 0 here.
3959                  * Balance will be done after all corrupt node/leaf is deleted.
3960                  */
3961                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3962                 if (ret < 0)
3963                         goto out;
3964                 offset = btrfs_node_blockptr(path.nodes[level],
3965                                              path.slots[level]);
3966
3967                 /* Remove the ptr */
3968                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3969                 if (ret < 0)
3970                         goto out;
3971                 /*
3972                  * Remove the corresponding extent
3973                  * return value is not concerned.
3974                  */
3975                 btrfs_release_path(&path);
3976                 ret = btrfs_free_extent(trans, root, offset,
3977                                 root->fs_info->nodesize, 0,
3978                                 root->root_key.objectid, level - 1, 0);
3979                 cache = next_cache_extent(cache);
3980         }
3981
3982         /* Balance the btree using btrfs_search_slot() */
3983         cache = first_cache_extent(corrupt_blocks);
3984         while (cache) {
3985                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3986                                        cache);
3987                 memcpy(&key, &corrupt->key, sizeof(key));
3988                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3989                 if (ret < 0)
3990                         goto out;
3991                 /* return will always >0 since it won't find the item */
3992                 ret = 0;
3993                 btrfs_release_path(&path);
3994                 cache = next_cache_extent(cache);
3995         }
3996 out:
3997         btrfs_commit_transaction(trans, root);
3998         btrfs_release_path(&path);
3999         return ret;
4000 }
4001
4002 static int check_fs_root(struct btrfs_root *root,
4003                          struct cache_tree *root_cache,
4004                          struct walk_control *wc)
4005 {
4006         int ret = 0;
4007         int err = 0;
4008         int wret;
4009         int level;
4010         struct btrfs_path path;
4011         struct shared_node root_node;
4012         struct root_record *rec;
4013         struct btrfs_root_item *root_item = &root->root_item;
4014         struct cache_tree corrupt_blocks;
4015         struct orphan_data_extent *orphan;
4016         struct orphan_data_extent *tmp;
4017         enum btrfs_tree_block_status status;
4018         struct node_refs nrefs;
4019
4020         /*
4021          * Reuse the corrupt_block cache tree to record corrupted tree block
4022          *
4023          * Unlike the usage in extent tree check, here we do it in a per
4024          * fs/subvol tree base.
4025          */
4026         cache_tree_init(&corrupt_blocks);
4027         root->fs_info->corrupt_blocks = &corrupt_blocks;
4028
4029         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4030                 rec = get_root_rec(root_cache, root->root_key.objectid);
4031                 BUG_ON(IS_ERR(rec));
4032                 if (btrfs_root_refs(root_item) > 0)
4033                         rec->found_root_item = 1;
4034         }
4035
4036         btrfs_init_path(&path);
4037         memset(&root_node, 0, sizeof(root_node));
4038         cache_tree_init(&root_node.root_cache);
4039         cache_tree_init(&root_node.inode_cache);
4040         memset(&nrefs, 0, sizeof(nrefs));
4041
4042         /* Move the orphan extent record to corresponding inode_record */
4043         list_for_each_entry_safe(orphan, tmp,
4044                                  &root->orphan_data_extents, list) {
4045                 struct inode_record *inode;
4046
4047                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4048                                       1);
4049                 BUG_ON(IS_ERR(inode));
4050                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4051                 list_move(&orphan->list, &inode->orphan_extents);
4052         }
4053
4054         level = btrfs_header_level(root->node);
4055         memset(wc->nodes, 0, sizeof(wc->nodes));
4056         wc->nodes[level] = &root_node;
4057         wc->active_node = level;
4058         wc->root_level = level;
4059
4060         /* We may not have checked the root block, lets do that now */
4061         if (btrfs_is_leaf(root->node))
4062                 status = btrfs_check_leaf(root, NULL, root->node);
4063         else
4064                 status = btrfs_check_node(root, NULL, root->node);
4065         if (status != BTRFS_TREE_BLOCK_CLEAN)
4066                 return -EIO;
4067
4068         if (btrfs_root_refs(root_item) > 0 ||
4069             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4070                 path.nodes[level] = root->node;
4071                 extent_buffer_get(root->node);
4072                 path.slots[level] = 0;
4073         } else {
4074                 struct btrfs_key key;
4075                 struct btrfs_disk_key found_key;
4076
4077                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4078                 level = root_item->drop_level;
4079                 path.lowest_level = level;
4080                 if (level > btrfs_header_level(root->node) ||
4081                     level >= BTRFS_MAX_LEVEL) {
4082                         error("ignoring invalid drop level: %u", level);
4083                         goto skip_walking;
4084                 }
4085                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4086                 if (wret < 0)
4087                         goto skip_walking;
4088                 btrfs_node_key(path.nodes[level], &found_key,
4089                                 path.slots[level]);
4090                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4091                                         sizeof(found_key)));
4092         }
4093
4094         while (1) {
4095                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4096                 if (wret < 0)
4097                         ret = wret;
4098                 if (wret != 0)
4099                         break;
4100
4101                 wret = walk_up_tree(root, &path, wc, &level);
4102                 if (wret < 0)
4103                         ret = wret;
4104                 if (wret != 0)
4105                         break;
4106         }
4107 skip_walking:
4108         btrfs_release_path(&path);
4109
4110         if (!cache_tree_empty(&corrupt_blocks)) {
4111                 struct cache_extent *cache;
4112                 struct btrfs_corrupt_block *corrupt;
4113
4114                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4115                        root->root_key.objectid);
4116                 cache = first_cache_extent(&corrupt_blocks);
4117                 while (cache) {
4118                         corrupt = container_of(cache,
4119                                                struct btrfs_corrupt_block,
4120                                                cache);
4121                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4122                                cache->start, corrupt->level,
4123                                corrupt->key.objectid, corrupt->key.type,
4124                                corrupt->key.offset);
4125                         cache = next_cache_extent(cache);
4126                 }
4127                 if (repair) {
4128                         printf("Try to repair the btree for root %llu\n",
4129                                root->root_key.objectid);
4130                         ret = repair_btree(root, &corrupt_blocks);
4131                         if (ret < 0)
4132                                 fprintf(stderr, "Failed to repair btree: %s\n",
4133                                         strerror(-ret));
4134                         if (!ret)
4135                                 printf("Btree for root %llu is fixed\n",
4136                                        root->root_key.objectid);
4137                 }
4138         }
4139
4140         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4141         if (err < 0)
4142                 ret = err;
4143
4144         if (root_node.current) {
4145                 root_node.current->checked = 1;
4146                 maybe_free_inode_rec(&root_node.inode_cache,
4147                                 root_node.current);
4148         }
4149
4150         err = check_inode_recs(root, &root_node.inode_cache);
4151         if (!ret)
4152                 ret = err;
4153
4154         free_corrupt_blocks_tree(&corrupt_blocks);
4155         root->fs_info->corrupt_blocks = NULL;
4156         free_orphan_data_extents(&root->orphan_data_extents);
4157         return ret;
4158 }
4159
4160 static int fs_root_objectid(u64 objectid)
4161 {
4162         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4163             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4164                 return 1;
4165         return is_fstree(objectid);
4166 }
4167
4168 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4169                           struct cache_tree *root_cache)
4170 {
4171         struct btrfs_path path;
4172         struct btrfs_key key;
4173         struct walk_control wc;
4174         struct extent_buffer *leaf, *tree_node;
4175         struct btrfs_root *tmp_root;
4176         struct btrfs_root *tree_root = fs_info->tree_root;
4177         int ret;
4178         int err = 0;
4179
4180         if (ctx.progress_enabled) {
4181                 ctx.tp = TASK_FS_ROOTS;
4182                 task_start(ctx.info);
4183         }
4184
4185         /*
4186          * Just in case we made any changes to the extent tree that weren't
4187          * reflected into the free space cache yet.
4188          */
4189         if (repair)
4190                 reset_cached_block_groups(fs_info);
4191         memset(&wc, 0, sizeof(wc));
4192         cache_tree_init(&wc.shared);
4193         btrfs_init_path(&path);
4194
4195 again:
4196         key.offset = 0;
4197         key.objectid = 0;
4198         key.type = BTRFS_ROOT_ITEM_KEY;
4199         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4200         if (ret < 0) {
4201                 err = 1;
4202                 goto out;
4203         }
4204         tree_node = tree_root->node;
4205         while (1) {
4206                 if (tree_node != tree_root->node) {
4207                         free_root_recs_tree(root_cache);
4208                         btrfs_release_path(&path);
4209                         goto again;
4210                 }
4211                 leaf = path.nodes[0];
4212                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4213                         ret = btrfs_next_leaf(tree_root, &path);
4214                         if (ret) {
4215                                 if (ret < 0)
4216                                         err = 1;
4217                                 break;
4218                         }
4219                         leaf = path.nodes[0];
4220                 }
4221                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4222                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4223                     fs_root_objectid(key.objectid)) {
4224                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4225                                 tmp_root = btrfs_read_fs_root_no_cache(
4226                                                 fs_info, &key);
4227                         } else {
4228                                 key.offset = (u64)-1;
4229                                 tmp_root = btrfs_read_fs_root(
4230                                                 fs_info, &key);
4231                         }
4232                         if (IS_ERR(tmp_root)) {
4233                                 err = 1;
4234                                 goto next;
4235                         }
4236                         ret = check_fs_root(tmp_root, root_cache, &wc);
4237                         if (ret == -EAGAIN) {
4238                                 free_root_recs_tree(root_cache);
4239                                 btrfs_release_path(&path);
4240                                 goto again;
4241                         }
4242                         if (ret)
4243                                 err = 1;
4244                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4245                                 btrfs_free_fs_root(tmp_root);
4246                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4247                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4248                         process_root_ref(leaf, path.slots[0], &key,
4249                                          root_cache);
4250                 }
4251 next:
4252                 path.slots[0]++;
4253         }
4254 out:
4255         btrfs_release_path(&path);
4256         if (err)
4257                 free_extent_cache_tree(&wc.shared);
4258         if (!cache_tree_empty(&wc.shared))
4259                 fprintf(stderr, "warning line %d\n", __LINE__);
4260
4261         task_stop(ctx.info);
4262
4263         return err;
4264 }
4265
4266 /*
4267  * Find the @index according by @ino and name.
4268  * Notice:time efficiency is O(N)
4269  *
4270  * @root:       the root of the fs/file tree
4271  * @index_ret:  the index as return value
4272  * @namebuf:    the name to match
4273  * @name_len:   the length of name to match
4274  * @file_type:  the file_type of INODE_ITEM to match
4275  *
4276  * Returns 0 if found and *@index_ret will be modified with right value
4277  * Returns< 0 not found and *@index_ret will be (u64)-1
4278  */
4279 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4280                           u64 *index_ret, char *namebuf, u32 name_len,
4281                           u8 file_type)
4282 {
4283         struct btrfs_path path;
4284         struct extent_buffer *node;
4285         struct btrfs_dir_item *di;
4286         struct btrfs_key key;
4287         struct btrfs_key location;
4288         char name[BTRFS_NAME_LEN] = {0};
4289
4290         u32 total;
4291         u32 cur = 0;
4292         u32 len;
4293         u32 data_len;
4294         u8 filetype;
4295         int slot;
4296         int ret;
4297
4298         ASSERT(index_ret);
4299
4300         /* search from the last index */
4301         key.objectid = dirid;
4302         key.offset = (u64)-1;
4303         key.type = BTRFS_DIR_INDEX_KEY;
4304
4305         btrfs_init_path(&path);
4306         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4307         if (ret < 0)
4308                 return ret;
4309
4310 loop:
4311         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4312         if (ret) {
4313                 ret = -ENOENT;
4314                 *index_ret = (64)-1;
4315                 goto out;
4316         }
4317         /* Check whether inode_id/filetype/name match */
4318         node = path.nodes[0];
4319         slot = path.slots[0];
4320         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4321         total = btrfs_item_size_nr(node, slot);
4322         while (cur < total) {
4323                 ret = -ENOENT;
4324                 len = btrfs_dir_name_len(node, di);
4325                 data_len = btrfs_dir_data_len(node, di);
4326
4327                 btrfs_dir_item_key_to_cpu(node, di, &location);
4328                 if (location.objectid != location_id ||
4329                     location.type != BTRFS_INODE_ITEM_KEY ||
4330                     location.offset != 0)
4331                         goto next;
4332
4333                 filetype = btrfs_dir_type(node, di);
4334                 if (file_type != filetype)
4335                         goto next;
4336
4337                 if (len > BTRFS_NAME_LEN)
4338                         len = BTRFS_NAME_LEN;
4339
4340                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4341                 if (len != name_len || strncmp(namebuf, name, len))
4342                         goto next;
4343
4344                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4345                 *index_ret = key.offset;
4346                 ret = 0;
4347                 goto out;
4348 next:
4349                 len += sizeof(*di) + data_len;
4350                 di = (struct btrfs_dir_item *)((char *)di + len);
4351                 cur += len;
4352         }
4353         goto loop;
4354
4355 out:
4356         btrfs_release_path(&path);
4357         return ret;
4358 }
4359
4360 /*
4361  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4362  * INODE_REF/INODE_EXTREF match.
4363  *
4364  * @root:       the root of the fs/file tree
4365  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4366  *              value while find index
4367  * @location_key: location key of the struct btrfs_dir_item to match
4368  * @name:       the name to match
4369  * @namelen:    the length of name
4370  * @file_type:  the type of file to math
4371  *
4372  * Return 0 if no error occurred.
4373  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4374  * DIR_ITEM/DIR_INDEX
4375  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4376  * and DIR_ITEM/DIR_INDEX mismatch
4377  */
4378 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4379                          struct btrfs_key *location_key, char *name,
4380                          u32 namelen, u8 file_type)
4381 {
4382         struct btrfs_path path;
4383         struct extent_buffer *node;
4384         struct btrfs_dir_item *di;
4385         struct btrfs_key location;
4386         char namebuf[BTRFS_NAME_LEN] = {0};
4387         u32 total;
4388         u32 cur = 0;
4389         u32 len;
4390         u32 data_len;
4391         u8 filetype;
4392         int slot;
4393         int ret;
4394
4395         /* get the index by traversing all index */
4396         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4397                 ret = find_dir_index(root, key->objectid,
4398                                      location_key->objectid, &key->offset,
4399                                      name, namelen, file_type);
4400                 if (ret)
4401                         ret = DIR_INDEX_MISSING;
4402                 return ret;
4403         }
4404
4405         btrfs_init_path(&path);
4406         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4407         if (ret) {
4408                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4409                         DIR_INDEX_MISSING;
4410                 goto out;
4411         }
4412
4413         /* Check whether inode_id/filetype/name match */
4414         node = path.nodes[0];
4415         slot = path.slots[0];
4416         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4417         total = btrfs_item_size_nr(node, slot);
4418         while (cur < total) {
4419                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4420                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4421
4422                 len = btrfs_dir_name_len(node, di);
4423                 data_len = btrfs_dir_data_len(node, di);
4424
4425                 btrfs_dir_item_key_to_cpu(node, di, &location);
4426                 if (location.objectid != location_key->objectid ||
4427                     location.type != location_key->type ||
4428                     location.offset != location_key->offset)
4429                         goto next;
4430
4431                 filetype = btrfs_dir_type(node, di);
4432                 if (file_type != filetype)
4433                         goto next;
4434
4435                 if (len > BTRFS_NAME_LEN) {
4436                         len = BTRFS_NAME_LEN;
4437                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4438                         root->objectid,
4439                         key->type == BTRFS_DIR_ITEM_KEY ?
4440                         "DIR_ITEM" : "DIR_INDEX",
4441                         key->objectid, key->offset, len);
4442                 }
4443                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4444                                    len);
4445                 if (len != namelen || strncmp(namebuf, name, len))
4446                         goto next;
4447
4448                 ret = 0;
4449                 goto out;
4450 next:
4451                 len += sizeof(*di) + data_len;
4452                 di = (struct btrfs_dir_item *)((char *)di + len);
4453                 cur += len;
4454         }
4455
4456 out:
4457         btrfs_release_path(&path);
4458         return ret;
4459 }
4460
4461 /*
4462  * Prints inode ref error message
4463  */
4464 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4465                                 u64 index, const char *namebuf, int name_len,
4466                                 u8 filetype, int err)
4467 {
4468         if (!err)
4469                 return;
4470
4471         /* root dir error */
4472         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4473                 error(
4474         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4475                       root->objectid, key->objectid, key->offset, namebuf);
4476                 return;
4477         }
4478
4479         /* normal error */
4480         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4481                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4482                       root->objectid, key->offset,
4483                       btrfs_name_hash(namebuf, name_len),
4484                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4485                       namebuf, filetype);
4486         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4487                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4488                       root->objectid, key->offset, index,
4489                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4490                       namebuf, filetype);
4491 }
4492
4493 /*
4494  * Insert the missing inode item.
4495  *
4496  * Returns 0 means success.
4497  * Returns <0 means error.
4498  */
4499 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4500                                      u8 filetype)
4501 {
4502         struct btrfs_key key;
4503         struct btrfs_trans_handle *trans;
4504         struct btrfs_path path;
4505         int ret;
4506
4507         key.objectid = ino;
4508         key.type = BTRFS_INODE_ITEM_KEY;
4509         key.offset = 0;
4510
4511         btrfs_init_path(&path);
4512         trans = btrfs_start_transaction(root, 1);
4513         if (IS_ERR(trans)) {
4514                 ret = -EIO;
4515                 goto out;
4516         }
4517
4518         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4519         if (ret < 0 || !ret)
4520                 goto fail;
4521
4522         /* insert inode item */
4523         create_inode_item_lowmem(trans, root, ino, filetype);
4524         ret = 0;
4525 fail:
4526         btrfs_commit_transaction(trans, root);
4527 out:
4528         if (ret)
4529                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4530                       root->objectid, ino);
4531         btrfs_release_path(&path);
4532         return ret;
4533 }
4534
4535 /*
4536  * Traverse the given INODE_REF and call find_dir_item() to find related
4537  * DIR_ITEM/DIR_INDEX.
4538  *
4539  * @root:       the root of the fs/file tree
4540  * @ref_key:    the key of the INODE_REF
4541  * @refs:       the count of INODE_REF
4542  * @mode:       the st_mode of INODE_ITEM
4543  *
4544  * Return 0 if no error occurred.
4545  */
4546 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4547                            struct btrfs_path *path, char *name_ret,
4548                            u32 *namelen_ret, u64 *refs, int mode)
4549 {
4550         struct btrfs_key key;
4551         struct btrfs_key location;
4552         struct btrfs_inode_ref *ref;
4553         struct extent_buffer *node;
4554         char namebuf[BTRFS_NAME_LEN] = {0};
4555         u32 total;
4556         u32 cur = 0;
4557         u32 len;
4558         u32 name_len;
4559         u64 index;
4560         int err = 0;
4561         int tmp_err;
4562         int slot;
4563
4564         location.objectid = ref_key->objectid;
4565         location.type = BTRFS_INODE_ITEM_KEY;
4566         location.offset = 0;
4567         node = path->nodes[0];
4568         slot = path->slots[0];
4569
4570         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4571         total = btrfs_item_size_nr(node, slot);
4572
4573 next:
4574         /* Update inode ref count */
4575         (*refs)++;
4576
4577         tmp_err = 0;
4578         index = btrfs_inode_ref_index(node, ref);
4579         name_len = btrfs_inode_ref_name_len(node, ref);
4580         if (cur + sizeof(*ref) + name_len > total ||
4581             name_len > BTRFS_NAME_LEN) {
4582                 warning("root %llu INODE_REF[%llu %llu] name too long",
4583                         root->objectid, ref_key->objectid, ref_key->offset);
4584
4585                 if (total < cur + sizeof(*ref))
4586                         goto out;
4587                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4588         } else {
4589                 len = name_len;
4590         }
4591
4592         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4593
4594         /* copy the fisrt name found to name_ret */
4595         if (*refs == 1 && name_ret) {
4596                 memcpy(name_ret, namebuf, len);
4597                 *namelen_ret = len;
4598         }
4599
4600         /* Check root dir ref */
4601         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4602                 if (index != 0 || len != strlen("..") ||
4603                     strncmp("..", namebuf, len) ||
4604                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4605                         /* set err bits then repair will delete the ref */
4606                         err |= DIR_INDEX_MISSING;
4607                         err |= DIR_ITEM_MISSING;
4608                 }
4609                 goto end;
4610         }
4611
4612         /* Find related DIR_INDEX */
4613         key.objectid = ref_key->offset;
4614         key.type = BTRFS_DIR_INDEX_KEY;
4615         key.offset = index;
4616         tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4617
4618         /* Find related dir_item */
4619         key.objectid = ref_key->offset;
4620         key.type = BTRFS_DIR_ITEM_KEY;
4621         key.offset = btrfs_name_hash(namebuf, len);
4622         tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4623
4624 end:
4625         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4626                             imode_to_type(mode), tmp_err);
4627         err |= tmp_err;
4628         len = sizeof(*ref) + name_len;
4629         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4630         cur += len;
4631         if (cur < total)
4632                 goto next;
4633
4634 out:
4635         return err;
4636 }
4637
4638 /*
4639  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4640  * DIR_ITEM/DIR_INDEX.
4641  *
4642  * @root:       the root of the fs/file tree
4643  * @ref_key:    the key of the INODE_EXTREF
4644  * @refs:       the count of INODE_EXTREF
4645  * @mode:       the st_mode of INODE_ITEM
4646  *
4647  * Return 0 if no error occurred.
4648  */
4649 static int check_inode_extref(struct btrfs_root *root,
4650                               struct btrfs_key *ref_key,
4651                               struct extent_buffer *node, int slot, u64 *refs,
4652                               int mode)
4653 {
4654         struct btrfs_key key;
4655         struct btrfs_key location;
4656         struct btrfs_inode_extref *extref;
4657         char namebuf[BTRFS_NAME_LEN] = {0};
4658         u32 total;
4659         u32 cur = 0;
4660         u32 len;
4661         u32 name_len;
4662         u64 index;
4663         u64 parent;
4664         int ret;
4665         int err = 0;
4666
4667         location.objectid = ref_key->objectid;
4668         location.type = BTRFS_INODE_ITEM_KEY;
4669         location.offset = 0;
4670
4671         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4672         total = btrfs_item_size_nr(node, slot);
4673
4674 next:
4675         /* update inode ref count */
4676         (*refs)++;
4677         name_len = btrfs_inode_extref_name_len(node, extref);
4678         index = btrfs_inode_extref_index(node, extref);
4679         parent = btrfs_inode_extref_parent(node, extref);
4680         if (name_len <= BTRFS_NAME_LEN) {
4681                 len = name_len;
4682         } else {
4683                 len = BTRFS_NAME_LEN;
4684                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4685                         root->objectid, ref_key->objectid, ref_key->offset);
4686         }
4687         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4688
4689         /* Check root dir ref name */
4690         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4691                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4692                       root->objectid, ref_key->objectid, ref_key->offset,
4693                       namebuf);
4694                 err |= ROOT_DIR_ERROR;
4695         }
4696
4697         /* find related dir_index */
4698         key.objectid = parent;
4699         key.type = BTRFS_DIR_INDEX_KEY;
4700         key.offset = index;
4701         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4702         err |= ret;
4703
4704         /* find related dir_item */
4705         key.objectid = parent;
4706         key.type = BTRFS_DIR_ITEM_KEY;
4707         key.offset = btrfs_name_hash(namebuf, len);
4708         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4709         err |= ret;
4710
4711         len = sizeof(*extref) + name_len;
4712         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4713         cur += len;
4714
4715         if (cur < total)
4716                 goto next;
4717
4718         return err;
4719 }
4720
4721 /*
4722  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4723  * DIR_ITEM/DIR_INDEX match.
4724  * Return with @index_ret.
4725  *
4726  * @root:       the root of the fs/file tree
4727  * @key:        the key of the INODE_REF/INODE_EXTREF
4728  * @name:       the name in the INODE_REF/INODE_EXTREF
4729  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4730  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4731  *              value (64)-1 means do not check index
4732  * @ext_ref:    the EXTENDED_IREF feature
4733  *
4734  * Return 0 if no error occurred.
4735  * Return >0 for error bitmap
4736  */
4737 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4738                           char *name, int namelen, u64 *index_ret,
4739                           unsigned int ext_ref)
4740 {
4741         struct btrfs_path path;
4742         struct btrfs_inode_ref *ref;
4743         struct btrfs_inode_extref *extref;
4744         struct extent_buffer *node;
4745         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4746         u32 total;
4747         u32 cur = 0;
4748         u32 len;
4749         u32 ref_namelen;
4750         u64 ref_index;
4751         u64 parent;
4752         u64 dir_id;
4753         int slot;
4754         int ret;
4755
4756         ASSERT(index_ret);
4757
4758         btrfs_init_path(&path);
4759         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4760         if (ret) {
4761                 ret = INODE_REF_MISSING;
4762                 goto extref;
4763         }
4764
4765         node = path.nodes[0];
4766         slot = path.slots[0];
4767
4768         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4769         total = btrfs_item_size_nr(node, slot);
4770
4771         /* Iterate all entry of INODE_REF */
4772         while (cur < total) {
4773                 ret = INODE_REF_MISSING;
4774
4775                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4776                 ref_index = btrfs_inode_ref_index(node, ref);
4777                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4778                         goto next_ref;
4779
4780                 if (cur + sizeof(*ref) + ref_namelen > total ||
4781                     ref_namelen > BTRFS_NAME_LEN) {
4782                         warning("root %llu INODE %s[%llu %llu] name too long",
4783                                 root->objectid,
4784                                 key->type == BTRFS_INODE_REF_KEY ?
4785                                         "REF" : "EXTREF",
4786                                 key->objectid, key->offset);
4787
4788                         if (cur + sizeof(*ref) > total)
4789                                 break;
4790                         len = min_t(u32, total - cur - sizeof(*ref),
4791                                     BTRFS_NAME_LEN);
4792                 } else {
4793                         len = ref_namelen;
4794                 }
4795
4796                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4797                                    len);
4798
4799                 if (len != namelen || strncmp(ref_namebuf, name, len))
4800                         goto next_ref;
4801
4802                 *index_ret = ref_index;
4803                 ret = 0;
4804                 goto out;
4805 next_ref:
4806                 len = sizeof(*ref) + ref_namelen;
4807                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4808                 cur += len;
4809         }
4810
4811 extref:
4812         /* Skip if not support EXTENDED_IREF feature */
4813         if (!ext_ref)
4814                 goto out;
4815
4816         btrfs_release_path(&path);
4817         btrfs_init_path(&path);
4818
4819         dir_id = key->offset;
4820         key->type = BTRFS_INODE_EXTREF_KEY;
4821         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4822
4823         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4824         if (ret) {
4825                 ret = INODE_REF_MISSING;
4826                 goto out;
4827         }
4828
4829         node = path.nodes[0];
4830         slot = path.slots[0];
4831
4832         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4833         cur = 0;
4834         total = btrfs_item_size_nr(node, slot);
4835
4836         /* Iterate all entry of INODE_EXTREF */
4837         while (cur < total) {
4838                 ret = INODE_REF_MISSING;
4839
4840                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4841                 ref_index = btrfs_inode_extref_index(node, extref);
4842                 parent = btrfs_inode_extref_parent(node, extref);
4843                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4844                         goto next_extref;
4845
4846                 if (parent != dir_id)
4847                         goto next_extref;
4848
4849                 if (ref_namelen <= BTRFS_NAME_LEN) {
4850                         len = ref_namelen;
4851                 } else {
4852                         len = BTRFS_NAME_LEN;
4853                         warning("root %llu INODE %s[%llu %llu] name too long",
4854                                 root->objectid,
4855                                 key->type == BTRFS_INODE_REF_KEY ?
4856                                         "REF" : "EXTREF",
4857                                 key->objectid, key->offset);
4858                 }
4859                 read_extent_buffer(node, ref_namebuf,
4860                                    (unsigned long)(extref + 1), len);
4861
4862                 if (len != namelen || strncmp(ref_namebuf, name, len))
4863                         goto next_extref;
4864
4865                 *index_ret = ref_index;
4866                 ret = 0;
4867                 goto out;
4868
4869 next_extref:
4870                 len = sizeof(*extref) + ref_namelen;
4871                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4872                 cur += len;
4873
4874         }
4875 out:
4876         btrfs_release_path(&path);
4877         return ret;
4878 }
4879
4880 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4881                                u64 ino, u64 index, const char *namebuf,
4882                                int name_len, u8 filetype, int err)
4883 {
4884         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4885                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4886                       root->objectid, key->objectid, key->offset, namebuf,
4887                       filetype,
4888                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4889         }
4890
4891         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4892                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4893                       root->objectid, key->objectid, index, namebuf, filetype,
4894                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4895         }
4896
4897         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4898                 error(
4899                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4900                       root->objectid, ino, index, namebuf, filetype,
4901                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4902         }
4903
4904         if (err & INODE_REF_MISSING)
4905                 error(
4906                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4907                       root->objectid, ino, key->objectid, namebuf, filetype);
4908
4909 }
4910
4911 /*
4912  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4913  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4914  *
4915  * @root:       the root of the fs/file tree
4916  * @key:        the key of the INODE_REF/INODE_EXTREF
4917  * @path:       the path
4918  * @size:       the st_size of the INODE_ITEM
4919  * @ext_ref:    the EXTENDED_IREF feature
4920  *
4921  * Return 0 if no error occurred.
4922  */
4923 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4924                           struct btrfs_path *path, u64 *size,
4925                           unsigned int ext_ref)
4926 {
4927         struct btrfs_dir_item *di;
4928         struct btrfs_inode_item *ii;
4929         struct btrfs_key key;
4930         struct btrfs_key location;
4931         struct extent_buffer *node;
4932         int slot;
4933         char namebuf[BTRFS_NAME_LEN] = {0};
4934         u32 total;
4935         u32 cur = 0;
4936         u32 len;
4937         u32 name_len;
4938         u32 data_len;
4939         u8 filetype;
4940         u32 mode;
4941         u64 index;
4942         int ret;
4943         int err = 0;
4944         int tmp_err;
4945
4946         node = path->nodes[0];
4947         slot = path->slots[0];
4948         /*
4949          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4950          * ignore index check.
4951          */
4952         if (di_key->type == BTRFS_DIR_INDEX_KEY)
4953                 index = di_key->offset;
4954         else
4955                 index = (u64)-1;
4956
4957         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4958         total = btrfs_item_size_nr(node, slot);
4959         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4960
4961         while (cur < total) {
4962                 data_len = btrfs_dir_data_len(node, di);
4963                 tmp_err = 0;
4964                 if (data_len)
4965                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4966                               root->objectid,
4967               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4968                               di_key->objectid, di_key->offset, data_len);
4969
4970                 name_len = btrfs_dir_name_len(node, di);
4971                 if (name_len <= BTRFS_NAME_LEN) {
4972                         len = name_len;
4973                 } else {
4974                         len = BTRFS_NAME_LEN;
4975                         warning("root %llu %s[%llu %llu] name too long",
4976                                 root->objectid,
4977                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4978                                 di_key->objectid, di_key->offset);
4979                 }
4980                 (*size) += name_len;
4981                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4982                                    len);
4983                 filetype = btrfs_dir_type(node, di);
4984
4985                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
4986                     di_key->offset != btrfs_name_hash(namebuf, len)) {
4987                         err |= -EIO;
4988                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4989                         root->objectid, di_key->objectid, di_key->offset,
4990                         namebuf, len, filetype, di_key->offset,
4991                         btrfs_name_hash(namebuf, len));
4992                 }
4993
4994                 btrfs_dir_item_key_to_cpu(node, di, &location);
4995                 /* Ignore related ROOT_ITEM check */
4996                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4997                         goto next;
4998
4999                 btrfs_release_path(path);
5000                 /* Check relative INODE_ITEM(existence/filetype) */
5001                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5002                 if (ret) {
5003                         tmp_err |= INODE_ITEM_MISSING;
5004                         goto next;
5005                 }
5006
5007                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5008                                     struct btrfs_inode_item);
5009                 mode = btrfs_inode_mode(path->nodes[0], ii);
5010                 if (imode_to_type(mode) != filetype) {
5011                         tmp_err |= INODE_ITEM_MISMATCH;
5012                         goto next;
5013                 }
5014
5015                 /* Check relative INODE_REF/INODE_EXTREF */
5016                 key.objectid = location.objectid;
5017                 key.type = BTRFS_INODE_REF_KEY;
5018                 key.offset = di_key->objectid;
5019                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5020                                           &index, ext_ref);
5021
5022                 /* check relative INDEX/ITEM */
5023                 key.objectid = di_key->objectid;
5024                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5025                         key.type = BTRFS_DIR_INDEX_KEY;
5026                         key.offset = index;
5027                 } else {
5028                         key.type = BTRFS_DIR_ITEM_KEY;
5029                         key.offset = btrfs_name_hash(namebuf, name_len);
5030                 }
5031
5032                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5033                                          name_len, filetype);
5034                 /* find_dir_item may find index */
5035                 if (key.type == BTRFS_DIR_INDEX_KEY)
5036                         index = key.offset;
5037 next:
5038                 btrfs_release_path(path);
5039                 print_dir_item_err(root, di_key, location.objectid, index,
5040                                    namebuf, name_len, filetype, tmp_err);
5041                 err |= tmp_err;
5042                 len = sizeof(*di) + name_len + data_len;
5043                 di = (struct btrfs_dir_item *)((char *)di + len);
5044                 cur += len;
5045
5046                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5047                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5048                               root->objectid, di_key->objectid,
5049                               di_key->offset);
5050                         break;
5051                 }
5052         }
5053
5054         /* research path */
5055         btrfs_release_path(path);
5056         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5057         if (ret)
5058                 err |= ret > 0 ? -ENOENT : ret;
5059         return err;
5060 }
5061
5062 /*
5063  * Check file extent datasum/hole, update the size of the file extents,
5064  * check and update the last offset of the file extent.
5065  *
5066  * @root:       the root of fs/file tree.
5067  * @fkey:       the key of the file extent.
5068  * @nodatasum:  INODE_NODATASUM feature.
5069  * @size:       the sum of all EXTENT_DATA items size for this inode.
5070  * @end:        the offset of the last extent.
5071  *
5072  * Return 0 if no error occurred.
5073  */
5074 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5075                              struct extent_buffer *node, int slot,
5076                              unsigned int nodatasum, u64 *size, u64 *end)
5077 {
5078         struct btrfs_file_extent_item *fi;
5079         u64 disk_bytenr;
5080         u64 disk_num_bytes;
5081         u64 extent_num_bytes;
5082         u64 extent_offset;
5083         u64 csum_found;         /* In byte size, sectorsize aligned */
5084         u64 search_start;       /* Logical range start we search for csum */
5085         u64 search_len;         /* Logical range len we search for csum */
5086         unsigned int extent_type;
5087         unsigned int is_hole;
5088         int compressed = 0;
5089         int ret;
5090         int err = 0;
5091
5092         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5093
5094         /* Check inline extent */
5095         extent_type = btrfs_file_extent_type(node, fi);
5096         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5097                 struct btrfs_item *e = btrfs_item_nr(slot);
5098                 u32 item_inline_len;
5099
5100                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5101                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5102                 compressed = btrfs_file_extent_compression(node, fi);
5103                 if (extent_num_bytes == 0) {
5104                         error(
5105                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5106                                 root->objectid, fkey->objectid, fkey->offset);
5107                         err |= FILE_EXTENT_ERROR;
5108                 }
5109                 if (!compressed && extent_num_bytes != item_inline_len) {
5110                         error(
5111                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5112                                 root->objectid, fkey->objectid, fkey->offset,
5113                                 extent_num_bytes, item_inline_len);
5114                         err |= FILE_EXTENT_ERROR;
5115                 }
5116                 *end += extent_num_bytes;
5117                 *size += extent_num_bytes;
5118                 return err;
5119         }
5120
5121         /* Check extent type */
5122         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5123                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5124                 err |= FILE_EXTENT_ERROR;
5125                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5126                       root->objectid, fkey->objectid, fkey->offset);
5127                 return err;
5128         }
5129
5130         /* Check REG_EXTENT/PREALLOC_EXTENT */
5131         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5132         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5133         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5134         extent_offset = btrfs_file_extent_offset(node, fi);
5135         compressed = btrfs_file_extent_compression(node, fi);
5136         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5137
5138         /*
5139          * Check EXTENT_DATA csum
5140          *
5141          * For plain (uncompressed) extent, we should only check the range
5142          * we're referring to, as it's possible that part of prealloc extent
5143          * has been written, and has csum:
5144          *
5145          * |<--- Original large preallocated extent A ---->|
5146          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5147          *      No csum                         Has csum
5148          *
5149          * For compressed extent, we should check the whole range.
5150          */
5151         if (!compressed) {
5152                 search_start = disk_bytenr + extent_offset;
5153                 search_len = extent_num_bytes;
5154         } else {
5155                 search_start = disk_bytenr;
5156                 search_len = disk_num_bytes;
5157         }
5158         ret = count_csum_range(root, search_start, search_len, &csum_found);
5159         if (csum_found > 0 && nodatasum) {
5160                 err |= ODD_CSUM_ITEM;
5161                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5162                       root->objectid, fkey->objectid, fkey->offset);
5163         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5164                    !is_hole && (ret < 0 || csum_found < search_len)) {
5165                 err |= CSUM_ITEM_MISSING;
5166                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5167                       root->objectid, fkey->objectid, fkey->offset,
5168                       csum_found, search_len);
5169         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5170                 err |= ODD_CSUM_ITEM;
5171                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5172                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5173         }
5174
5175         /* Check EXTENT_DATA hole */
5176         if (!no_holes && *end != fkey->offset) {
5177                 err |= FILE_EXTENT_ERROR;
5178                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5179                       root->objectid, fkey->objectid, fkey->offset);
5180         }
5181
5182         *end += extent_num_bytes;
5183         if (!is_hole)
5184                 *size += extent_num_bytes;
5185
5186         return err;
5187 }
5188
5189 /*
5190  * Set inode item nbytes to @nbytes
5191  *
5192  * Returns  0     on success
5193  * Returns  != 0  on error
5194  */
5195 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5196                                       struct btrfs_path *path,
5197                                       u64 ino, u64 nbytes)
5198 {
5199         struct btrfs_trans_handle *trans;
5200         struct btrfs_inode_item *ii;
5201         struct btrfs_key key;
5202         struct btrfs_key research_key;
5203         int err = 0;
5204         int ret;
5205
5206         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5207
5208         key.objectid = ino;
5209         key.type = BTRFS_INODE_ITEM_KEY;
5210         key.offset = 0;
5211
5212         trans = btrfs_start_transaction(root, 1);
5213         if (IS_ERR(trans)) {
5214                 ret = PTR_ERR(trans);
5215                 err |= ret;
5216                 goto out;
5217         }
5218
5219         btrfs_release_path(path);
5220         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5221         if (ret > 0)
5222                 ret = -ENOENT;
5223         if (ret) {
5224                 err |= ret;
5225                 goto fail;
5226         }
5227
5228         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5229                             struct btrfs_inode_item);
5230         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5231         btrfs_mark_buffer_dirty(path->nodes[0]);
5232 fail:
5233         btrfs_commit_transaction(trans, root);
5234 out:
5235         if (ret)
5236                 error("failed to set nbytes in inode %llu root %llu",
5237                       ino, root->root_key.objectid);
5238         else
5239                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5240                        root->root_key.objectid, nbytes);
5241
5242         /* research path */
5243         btrfs_release_path(path);
5244         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5245         err |= ret;
5246
5247         return err;
5248 }
5249
5250 /*
5251  * Set directory inode isize to @isize.
5252  *
5253  * Returns 0     on success.
5254  * Returns != 0  on error.
5255  */
5256 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5257                                    struct btrfs_path *path,
5258                                    u64 ino, u64 isize)
5259 {
5260         struct btrfs_trans_handle *trans;
5261         struct btrfs_inode_item *ii;
5262         struct btrfs_key key;
5263         struct btrfs_key research_key;
5264         int ret;
5265         int err = 0;
5266
5267         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5268
5269         key.objectid = ino;
5270         key.type = BTRFS_INODE_ITEM_KEY;
5271         key.offset = 0;
5272
5273         trans = btrfs_start_transaction(root, 1);
5274         if (IS_ERR(trans)) {
5275                 ret = PTR_ERR(trans);
5276                 err |= ret;
5277                 goto out;
5278         }
5279
5280         btrfs_release_path(path);
5281         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5282         if (ret > 0)
5283                 ret = -ENOENT;
5284         if (ret) {
5285                 err |= ret;
5286                 goto fail;
5287         }
5288
5289         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5290                             struct btrfs_inode_item);
5291         btrfs_set_inode_size(path->nodes[0], ii, isize);
5292         btrfs_mark_buffer_dirty(path->nodes[0]);
5293 fail:
5294         btrfs_commit_transaction(trans, root);
5295 out:
5296         if (ret)
5297                 error("failed to set isize in inode %llu root %llu",
5298                       ino, root->root_key.objectid);
5299         else
5300                 printf("Set isize in inode %llu root %llu to %llu\n",
5301                        ino, root->root_key.objectid, isize);
5302
5303         btrfs_release_path(path);
5304         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5305         err |= ret;
5306
5307         return err;
5308 }
5309
5310 /*
5311  * Wrapper function for btrfs_add_orphan_item().
5312  *
5313  * Returns 0     on success.
5314  * Returns != 0  on error.
5315  */
5316 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5317                                            struct btrfs_path *path, u64 ino)
5318 {
5319         struct btrfs_trans_handle *trans;
5320         struct btrfs_key research_key;
5321         int ret;
5322         int err = 0;
5323
5324         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5325
5326         trans = btrfs_start_transaction(root, 1);
5327         if (IS_ERR(trans)) {
5328                 ret = PTR_ERR(trans);
5329                 err |= ret;
5330                 goto out;
5331         }
5332
5333         btrfs_release_path(path);
5334         ret = btrfs_add_orphan_item(trans, root, path, ino);
5335         err |= ret;
5336         btrfs_commit_transaction(trans, root);
5337 out:
5338         if (ret)
5339                 error("failed to add inode %llu as orphan item root %llu",
5340                       ino, root->root_key.objectid);
5341         else
5342                 printf("Added inode %llu as orphan item root %llu\n",
5343                        ino, root->root_key.objectid);
5344
5345         btrfs_release_path(path);
5346         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5347         err |= ret;
5348
5349         return err;
5350 }
5351
5352 /*
5353  * Check INODE_ITEM and related ITEMs (the same inode number)
5354  * 1. check link count
5355  * 2. check inode ref/extref
5356  * 3. check dir item/index
5357  *
5358  * @ext_ref:    the EXTENDED_IREF feature
5359  *
5360  * Return 0 if no error occurred.
5361  * Return >0 for error or hit the traversal is done(by error bitmap)
5362  */
5363 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5364                             unsigned int ext_ref)
5365 {
5366         struct extent_buffer *node;
5367         struct btrfs_inode_item *ii;
5368         struct btrfs_key key;
5369         u64 inode_id;
5370         u32 mode;
5371         u64 nlink;
5372         u64 nbytes;
5373         u64 isize;
5374         u64 size = 0;
5375         u64 refs = 0;
5376         u64 extent_end = 0;
5377         u64 extent_size = 0;
5378         unsigned int dir;
5379         unsigned int nodatasum;
5380         int slot;
5381         int ret;
5382         int err = 0;
5383         char namebuf[BTRFS_NAME_LEN] = {0};
5384         u32 name_len = 0;
5385
5386         node = path->nodes[0];
5387         slot = path->slots[0];
5388
5389         btrfs_item_key_to_cpu(node, &key, slot);
5390         inode_id = key.objectid;
5391
5392         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5393                 ret = btrfs_next_item(root, path);
5394                 if (ret > 0)
5395                         err |= LAST_ITEM;
5396                 return err;
5397         }
5398
5399         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5400         isize = btrfs_inode_size(node, ii);
5401         nbytes = btrfs_inode_nbytes(node, ii);
5402         mode = btrfs_inode_mode(node, ii);
5403         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5404         nlink = btrfs_inode_nlink(node, ii);
5405         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5406
5407         while (1) {
5408                 ret = btrfs_next_item(root, path);
5409                 if (ret < 0) {
5410                         /* out will fill 'err' rusing current statistics */
5411                         goto out;
5412                 } else if (ret > 0) {
5413                         err |= LAST_ITEM;
5414                         goto out;
5415                 }
5416
5417                 node = path->nodes[0];
5418                 slot = path->slots[0];
5419                 btrfs_item_key_to_cpu(node, &key, slot);
5420                 if (key.objectid != inode_id)
5421                         goto out;
5422
5423                 switch (key.type) {
5424                 case BTRFS_INODE_REF_KEY:
5425                         ret = check_inode_ref(root, &key, path, namebuf,
5426                                               &name_len, &refs, mode);
5427                         err |= ret;
5428                         break;
5429                 case BTRFS_INODE_EXTREF_KEY:
5430                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5431                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5432                                         root->objectid, key.objectid,
5433                                         key.offset);
5434                         ret = check_inode_extref(root, &key, node, slot, &refs,
5435                                                  mode);
5436                         err |= ret;
5437                         break;
5438                 case BTRFS_DIR_ITEM_KEY:
5439                 case BTRFS_DIR_INDEX_KEY:
5440                         if (!dir) {
5441                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5442                                         root->objectid, inode_id,
5443                                         imode_to_type(mode), key.objectid,
5444                                         key.offset);
5445                         }
5446                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5447                         err |= ret;
5448                         break;
5449                 case BTRFS_EXTENT_DATA_KEY:
5450                         if (dir) {
5451                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5452                                         root->objectid, inode_id, key.objectid,
5453                                         key.offset);
5454                         }
5455                         ret = check_file_extent(root, &key, node, slot,
5456                                                 nodatasum, &extent_size,
5457                                                 &extent_end);
5458                         err |= ret;
5459                         break;
5460                 case BTRFS_XATTR_ITEM_KEY:
5461                         break;
5462                 default:
5463                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5464                               key.objectid, key.type, key.offset);
5465                 }
5466         }
5467
5468 out:
5469         /* verify INODE_ITEM nlink/isize/nbytes */
5470         if (dir) {
5471                 if (nlink != 1) {
5472                         err |= LINK_COUNT_ERROR;
5473                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5474                               root->objectid, inode_id, nlink);
5475                 }
5476
5477                 /*
5478                  * Just a warning, as dir inode nbytes is just an
5479                  * instructive value.
5480                  */
5481                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5482                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5483                                 root->objectid, inode_id,
5484                                 root->fs_info->nodesize);
5485                 }
5486
5487                 if (isize != size) {
5488                         if (repair)
5489                                 ret = repair_dir_isize_lowmem(root, path,
5490                                                               inode_id, size);
5491                         if (!repair || ret) {
5492                                 err |= ISIZE_ERROR;
5493                                 error(
5494                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5495                                       root->objectid, inode_id, isize, size);
5496                         }
5497                 }
5498         } else {
5499                 if (nlink != refs) {
5500                         err |= LINK_COUNT_ERROR;
5501                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5502                               root->objectid, inode_id, nlink, refs);
5503                 } else if (!nlink) {
5504                         if (repair)
5505                                 ret = repair_inode_orphan_item_lowmem(root,
5506                                                               path, inode_id);
5507                         if (!repair || ret) {
5508                                 err |= ORPHAN_ITEM;
5509                                 error("root %llu INODE[%llu] is orphan item",
5510                                       root->objectid, inode_id);
5511                         }
5512                 }
5513
5514                 if (!nbytes && !no_holes && extent_end < isize) {
5515                         err |= NBYTES_ERROR;
5516                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5517                               root->objectid, inode_id, isize);
5518                 }
5519
5520                 if (nbytes != extent_size) {
5521                         if (repair)
5522                                 ret = repair_inode_nbytes_lowmem(root, path,
5523                                                          inode_id, extent_size);
5524                         if (!repair || ret) {
5525                                 err |= NBYTES_ERROR;
5526                                 error(
5527         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5528                                       root->objectid, inode_id, nbytes,
5529                                       extent_size);
5530                         }
5531                 }
5532         }
5533
5534         return err;
5535 }
5536
5537 /*
5538  * Insert the missing inode item and inode ref.
5539  *
5540  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5541  * Root dir should be handled specially because root dir is the root of fs.
5542  *
5543  * returns err (>0 or 0) after repair
5544  */
5545 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5546 {
5547         struct btrfs_trans_handle *trans;
5548         struct btrfs_key key;
5549         struct btrfs_path path;
5550         int filetype = BTRFS_FT_DIR;
5551         int ret = 0;
5552
5553         btrfs_init_path(&path);
5554
5555         if (err & INODE_REF_MISSING) {
5556                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5557                 key.type = BTRFS_INODE_REF_KEY;
5558                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5559
5560                 trans = btrfs_start_transaction(root, 1);
5561                 if (IS_ERR(trans)) {
5562                         ret = PTR_ERR(trans);
5563                         goto out;
5564                 }
5565
5566                 btrfs_release_path(&path);
5567                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5568                 if (ret)
5569                         goto trans_fail;
5570
5571                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5572                                              BTRFS_FIRST_FREE_OBJECTID,
5573                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5574                 if (ret)
5575                         goto trans_fail;
5576
5577                 printf("Add INODE_REF[%llu %llu] name %s\n",
5578                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5579                        "..");
5580                 err &= ~INODE_REF_MISSING;
5581 trans_fail:
5582                 if (ret)
5583                         error("fail to insert first inode's ref");
5584                 btrfs_commit_transaction(trans, root);
5585         }
5586
5587         if (err & INODE_ITEM_MISSING) {
5588                 ret = repair_inode_item_missing(root,
5589                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5590                 if (ret)
5591                         goto out;
5592                 err &= ~INODE_ITEM_MISSING;
5593         }
5594 out:
5595         if (ret)
5596                 error("fail to repair first inode");
5597         btrfs_release_path(&path);
5598         return err;
5599 }
5600
5601 /*
5602  * check first root dir's inode_item and inode_ref
5603  *
5604  * returns 0 means no error
5605  * returns >0 means error
5606  * returns <0 means fatal error
5607  */
5608 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5609 {
5610         struct btrfs_path path;
5611         struct btrfs_key key;
5612         struct btrfs_inode_item *ii;
5613         u64 index;
5614         u32 mode;
5615         int err = 0;
5616         int ret;
5617
5618         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5619         key.type = BTRFS_INODE_ITEM_KEY;
5620         key.offset = 0;
5621
5622         /* For root being dropped, we don't need to check first inode */
5623         if (btrfs_root_refs(&root->root_item) == 0 &&
5624             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5625             BTRFS_FIRST_FREE_OBJECTID)
5626                 return 0;
5627
5628         btrfs_init_path(&path);
5629         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5630         if (ret < 0)
5631                 goto out;
5632         if (ret > 0) {
5633                 ret = 0;
5634                 err |= INODE_ITEM_MISSING;
5635         } else {
5636                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5637                                     struct btrfs_inode_item);
5638                 mode = btrfs_inode_mode(path.nodes[0], ii);
5639                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5640                         err |= INODE_ITEM_MISMATCH;
5641         }
5642
5643         /* lookup first inode ref */
5644         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5645         key.type = BTRFS_INODE_REF_KEY;
5646         /* special index value */
5647         index = 0;
5648
5649         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5650         if (ret < 0)
5651                 goto out;
5652         err |= ret;
5653
5654 out:
5655         btrfs_release_path(&path);
5656
5657         if (err && repair)
5658                 err = repair_fs_first_inode(root, err);
5659
5660         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5661                 error("root dir INODE_ITEM is %s",
5662                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5663         if (err & INODE_REF_MISSING)
5664                 error("root dir INODE_REF is missing");
5665
5666         return ret < 0 ? ret : err;
5667 }
5668
5669 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5670                                                 u64 parent, u64 root)
5671 {
5672         struct rb_node *node;
5673         struct tree_backref *back = NULL;
5674         struct tree_backref match = {
5675                 .node = {
5676                         .is_data = 0,
5677                 },
5678         };
5679
5680         if (parent) {
5681                 match.parent = parent;
5682                 match.node.full_backref = 1;
5683         } else {
5684                 match.root = root;
5685         }
5686
5687         node = rb_search(&rec->backref_tree, &match.node.node,
5688                          (rb_compare_keys)compare_extent_backref, NULL);
5689         if (node)
5690                 back = to_tree_backref(rb_node_to_extent_backref(node));
5691
5692         return back;
5693 }
5694
5695 static struct data_backref *find_data_backref(struct extent_record *rec,
5696                                                 u64 parent, u64 root,
5697                                                 u64 owner, u64 offset,
5698                                                 int found_ref,
5699                                                 u64 disk_bytenr, u64 bytes)
5700 {
5701         struct rb_node *node;
5702         struct data_backref *back = NULL;
5703         struct data_backref match = {
5704                 .node = {
5705                         .is_data = 1,
5706                 },
5707                 .owner = owner,
5708                 .offset = offset,
5709                 .bytes = bytes,
5710                 .found_ref = found_ref,
5711                 .disk_bytenr = disk_bytenr,
5712         };
5713
5714         if (parent) {
5715                 match.parent = parent;
5716                 match.node.full_backref = 1;
5717         } else {
5718                 match.root = root;
5719         }
5720
5721         node = rb_search(&rec->backref_tree, &match.node.node,
5722                          (rb_compare_keys)compare_extent_backref, NULL);
5723         if (node)
5724                 back = to_data_backref(rb_node_to_extent_backref(node));
5725
5726         return back;
5727 }
5728 /*
5729  * Iterate all item on the tree and call check_inode_item() to check.
5730  *
5731  * @root:       the root of the tree to be checked.
5732  * @ext_ref:    the EXTENDED_IREF feature
5733  *
5734  * Return 0 if no error found.
5735  * Return <0 for error.
5736  */
5737 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5738 {
5739         struct btrfs_path path;
5740         struct node_refs nrefs;
5741         struct btrfs_root_item *root_item = &root->root_item;
5742         int ret;
5743         int level;
5744         int err = 0;
5745
5746         /*
5747          * We need to manually check the first inode item(256)
5748          * As the following traversal function will only start from
5749          * the first inode item in the leaf, if inode item(256) is missing
5750          * we will just skip it forever.
5751          */
5752         ret = check_fs_first_inode(root, ext_ref);
5753         if (ret < 0)
5754                 return ret;
5755         err |= !!ret;
5756
5757         memset(&nrefs, 0, sizeof(nrefs));
5758         level = btrfs_header_level(root->node);
5759         btrfs_init_path(&path);
5760
5761         if (btrfs_root_refs(root_item) > 0 ||
5762             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5763                 path.nodes[level] = root->node;
5764                 path.slots[level] = 0;
5765                 extent_buffer_get(root->node);
5766         } else {
5767                 struct btrfs_key key;
5768
5769                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5770                 level = root_item->drop_level;
5771                 path.lowest_level = level;
5772                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5773                 if (ret < 0)
5774                         goto out;
5775                 ret = 0;
5776         }
5777
5778         while (1) {
5779                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5780                 err |= !!ret;
5781
5782                 /* if ret is negative, walk shall stop */
5783                 if (ret < 0) {
5784                         ret = err;
5785                         break;
5786                 }
5787
5788                 ret = walk_up_tree_v2(root, &path, &level);
5789                 if (ret != 0) {
5790                         /* Normal exit, reset ret to err */
5791                         ret = err;
5792                         break;
5793                 }
5794         }
5795
5796 out:
5797         btrfs_release_path(&path);
5798         return ret;
5799 }
5800
5801 /*
5802  * Find the relative ref for root_ref and root_backref.
5803  *
5804  * @root:       the root of the root tree.
5805  * @ref_key:    the key of the root ref.
5806  *
5807  * Return 0 if no error occurred.
5808  */
5809 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5810                           struct extent_buffer *node, int slot)
5811 {
5812         struct btrfs_path path;
5813         struct btrfs_key key;
5814         struct btrfs_root_ref *ref;
5815         struct btrfs_root_ref *backref;
5816         char ref_name[BTRFS_NAME_LEN] = {0};
5817         char backref_name[BTRFS_NAME_LEN] = {0};
5818         u64 ref_dirid;
5819         u64 ref_seq;
5820         u32 ref_namelen;
5821         u64 backref_dirid;
5822         u64 backref_seq;
5823         u32 backref_namelen;
5824         u32 len;
5825         int ret;
5826         int err = 0;
5827
5828         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5829         ref_dirid = btrfs_root_ref_dirid(node, ref);
5830         ref_seq = btrfs_root_ref_sequence(node, ref);
5831         ref_namelen = btrfs_root_ref_name_len(node, ref);
5832
5833         if (ref_namelen <= BTRFS_NAME_LEN) {
5834                 len = ref_namelen;
5835         } else {
5836                 len = BTRFS_NAME_LEN;
5837                 warning("%s[%llu %llu] ref_name too long",
5838                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5839                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5840                         ref_key->offset);
5841         }
5842         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5843
5844         /* Find relative root_ref */
5845         key.objectid = ref_key->offset;
5846         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5847         key.offset = ref_key->objectid;
5848
5849         btrfs_init_path(&path);
5850         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5851         if (ret) {
5852                 err |= ROOT_REF_MISSING;
5853                 error("%s[%llu %llu] couldn't find relative ref",
5854                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5855                       "ROOT_REF" : "ROOT_BACKREF",
5856                       ref_key->objectid, ref_key->offset);
5857                 goto out;
5858         }
5859
5860         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5861                                  struct btrfs_root_ref);
5862         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5863         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5864         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5865
5866         if (backref_namelen <= BTRFS_NAME_LEN) {
5867                 len = backref_namelen;
5868         } else {
5869                 len = BTRFS_NAME_LEN;
5870                 warning("%s[%llu %llu] ref_name too long",
5871                         key.type == BTRFS_ROOT_REF_KEY ?
5872                         "ROOT_REF" : "ROOT_BACKREF",
5873                         key.objectid, key.offset);
5874         }
5875         read_extent_buffer(path.nodes[0], backref_name,
5876                            (unsigned long)(backref + 1), len);
5877
5878         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5879             ref_namelen != backref_namelen ||
5880             strncmp(ref_name, backref_name, len)) {
5881                 err |= ROOT_REF_MISMATCH;
5882                 error("%s[%llu %llu] mismatch relative ref",
5883                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5884                       "ROOT_REF" : "ROOT_BACKREF",
5885                       ref_key->objectid, ref_key->offset);
5886         }
5887 out:
5888         btrfs_release_path(&path);
5889         return err;
5890 }
5891
5892 /*
5893  * Check all fs/file tree in low_memory mode.
5894  *
5895  * 1. for fs tree root item, call check_fs_root_v2()
5896  * 2. for fs tree root ref/backref, call check_root_ref()
5897  *
5898  * Return 0 if no error occurred.
5899  */
5900 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5901 {
5902         struct btrfs_root *tree_root = fs_info->tree_root;
5903         struct btrfs_root *cur_root = NULL;
5904         struct btrfs_path path;
5905         struct btrfs_key key;
5906         struct extent_buffer *node;
5907         unsigned int ext_ref;
5908         int slot;
5909         int ret;
5910         int err = 0;
5911
5912         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5913
5914         btrfs_init_path(&path);
5915         key.objectid = BTRFS_FS_TREE_OBJECTID;
5916         key.offset = 0;
5917         key.type = BTRFS_ROOT_ITEM_KEY;
5918
5919         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5920         if (ret < 0) {
5921                 err = ret;
5922                 goto out;
5923         } else if (ret > 0) {
5924                 err = -ENOENT;
5925                 goto out;
5926         }
5927
5928         while (1) {
5929                 node = path.nodes[0];
5930                 slot = path.slots[0];
5931                 btrfs_item_key_to_cpu(node, &key, slot);
5932                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5933                         goto out;
5934                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5935                     fs_root_objectid(key.objectid)) {
5936                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5937                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5938                                                                        &key);
5939                         } else {
5940                                 key.offset = (u64)-1;
5941                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5942                         }
5943
5944                         if (IS_ERR(cur_root)) {
5945                                 error("Fail to read fs/subvol tree: %lld",
5946                                       key.objectid);
5947                                 err = -EIO;
5948                                 goto next;
5949                         }
5950
5951                         ret = check_fs_root_v2(cur_root, ext_ref);
5952                         err |= ret;
5953
5954                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5955                                 btrfs_free_fs_root(cur_root);
5956                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5957                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5958                         ret = check_root_ref(tree_root, &key, node, slot);
5959                         err |= ret;
5960                 }
5961 next:
5962                 ret = btrfs_next_item(tree_root, &path);
5963                 if (ret > 0)
5964                         goto out;
5965                 if (ret < 0) {
5966                         err = ret;
5967                         goto out;
5968                 }
5969         }
5970
5971 out:
5972         btrfs_release_path(&path);
5973         return err;
5974 }
5975
5976 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5977                           struct cache_tree *root_cache)
5978 {
5979         int ret;
5980
5981         if (!ctx.progress_enabled)
5982                 fprintf(stderr, "checking fs roots\n");
5983         if (check_mode == CHECK_MODE_LOWMEM)
5984                 ret = check_fs_roots_v2(fs_info);
5985         else
5986                 ret = check_fs_roots(fs_info, root_cache);
5987
5988         return ret;
5989 }
5990
5991 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5992 {
5993         struct extent_backref *back, *tmp;
5994         struct tree_backref *tback;
5995         struct data_backref *dback;
5996         u64 found = 0;
5997         int err = 0;
5998
5999         rbtree_postorder_for_each_entry_safe(back, tmp,
6000                                              &rec->backref_tree, node) {
6001                 if (!back->found_extent_tree) {
6002                         err = 1;
6003                         if (!print_errs)
6004                                 goto out;
6005                         if (back->is_data) {
6006                                 dback = to_data_backref(back);
6007                                 fprintf(stderr, "Data backref %llu %s %llu"
6008                                         " owner %llu offset %llu num_refs %lu"
6009                                         " not found in extent tree\n",
6010                                         (unsigned long long)rec->start,
6011                                         back->full_backref ?
6012                                         "parent" : "root",
6013                                         back->full_backref ?
6014                                         (unsigned long long)dback->parent:
6015                                         (unsigned long long)dback->root,
6016                                         (unsigned long long)dback->owner,
6017                                         (unsigned long long)dback->offset,
6018                                         (unsigned long)dback->num_refs);
6019                         } else {
6020                                 tback = to_tree_backref(back);
6021                                 fprintf(stderr, "Tree backref %llu parent %llu"
6022                                         " root %llu not found in extent tree\n",
6023                                         (unsigned long long)rec->start,
6024                                         (unsigned long long)tback->parent,
6025                                         (unsigned long long)tback->root);
6026                         }
6027                 }
6028                 if (!back->is_data && !back->found_ref) {
6029                         err = 1;
6030                         if (!print_errs)
6031                                 goto out;
6032                         tback = to_tree_backref(back);
6033                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6034                                 (unsigned long long)rec->start,
6035                                 back->full_backref ? "parent" : "root",
6036                                 back->full_backref ?
6037                                 (unsigned long long)tback->parent :
6038                                 (unsigned long long)tback->root, back);
6039                 }
6040                 if (back->is_data) {
6041                         dback = to_data_backref(back);
6042                         if (dback->found_ref != dback->num_refs) {
6043                                 err = 1;
6044                                 if (!print_errs)
6045                                         goto out;
6046                                 fprintf(stderr, "Incorrect local backref count"
6047                                         " on %llu %s %llu owner %llu"
6048                                         " offset %llu found %u wanted %u back %p\n",
6049                                         (unsigned long long)rec->start,
6050                                         back->full_backref ?
6051                                         "parent" : "root",
6052                                         back->full_backref ?
6053                                         (unsigned long long)dback->parent:
6054                                         (unsigned long long)dback->root,
6055                                         (unsigned long long)dback->owner,
6056                                         (unsigned long long)dback->offset,
6057                                         dback->found_ref, dback->num_refs, back);
6058                         }
6059                         if (dback->disk_bytenr != rec->start) {
6060                                 err = 1;
6061                                 if (!print_errs)
6062                                         goto out;
6063                                 fprintf(stderr, "Backref disk bytenr does not"
6064                                         " match extent record, bytenr=%llu, "
6065                                         "ref bytenr=%llu\n",
6066                                         (unsigned long long)rec->start,
6067                                         (unsigned long long)dback->disk_bytenr);
6068                         }
6069
6070                         if (dback->bytes != rec->nr) {
6071                                 err = 1;
6072                                 if (!print_errs)
6073                                         goto out;
6074                                 fprintf(stderr, "Backref bytes do not match "
6075                                         "extent backref, bytenr=%llu, ref "
6076                                         "bytes=%llu, backref bytes=%llu\n",
6077                                         (unsigned long long)rec->start,
6078                                         (unsigned long long)rec->nr,
6079                                         (unsigned long long)dback->bytes);
6080                         }
6081                 }
6082                 if (!back->is_data) {
6083                         found += 1;
6084                 } else {
6085                         dback = to_data_backref(back);
6086                         found += dback->found_ref;
6087                 }
6088         }
6089         if (found != rec->refs) {
6090                 err = 1;
6091                 if (!print_errs)
6092                         goto out;
6093                 fprintf(stderr, "Incorrect global backref count "
6094                         "on %llu found %llu wanted %llu\n",
6095                         (unsigned long long)rec->start,
6096                         (unsigned long long)found,
6097                         (unsigned long long)rec->refs);
6098         }
6099 out:
6100         return err;
6101 }
6102
6103 static void __free_one_backref(struct rb_node *node)
6104 {
6105         struct extent_backref *back = rb_node_to_extent_backref(node);
6106
6107         free(back);
6108 }
6109
6110 static void free_all_extent_backrefs(struct extent_record *rec)
6111 {
6112         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6113 }
6114
6115 static void free_extent_record_cache(struct cache_tree *extent_cache)
6116 {
6117         struct cache_extent *cache;
6118         struct extent_record *rec;
6119
6120         while (1) {
6121                 cache = first_cache_extent(extent_cache);
6122                 if (!cache)
6123                         break;
6124                 rec = container_of(cache, struct extent_record, cache);
6125                 remove_cache_extent(extent_cache, cache);
6126                 free_all_extent_backrefs(rec);
6127                 free(rec);
6128         }
6129 }
6130
6131 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6132                                  struct extent_record *rec)
6133 {
6134         if (rec->content_checked && rec->owner_ref_checked &&
6135             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6136             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6137             !rec->bad_full_backref && !rec->crossing_stripes &&
6138             !rec->wrong_chunk_type) {
6139                 remove_cache_extent(extent_cache, &rec->cache);
6140                 free_all_extent_backrefs(rec);
6141                 list_del_init(&rec->list);
6142                 free(rec);
6143         }
6144         return 0;
6145 }
6146
6147 static int check_owner_ref(struct btrfs_root *root,
6148                             struct extent_record *rec,
6149                             struct extent_buffer *buf)
6150 {
6151         struct extent_backref *node, *tmp;
6152         struct tree_backref *back;
6153         struct btrfs_root *ref_root;
6154         struct btrfs_key key;
6155         struct btrfs_path path;
6156         struct extent_buffer *parent;
6157         int level;
6158         int found = 0;
6159         int ret;
6160
6161         rbtree_postorder_for_each_entry_safe(node, tmp,
6162                                              &rec->backref_tree, node) {
6163                 if (node->is_data)
6164                         continue;
6165                 if (!node->found_ref)
6166                         continue;
6167                 if (node->full_backref)
6168                         continue;
6169                 back = to_tree_backref(node);
6170                 if (btrfs_header_owner(buf) == back->root)
6171                         return 0;
6172         }
6173         BUG_ON(rec->is_root);
6174
6175         /* try to find the block by search corresponding fs tree */
6176         key.objectid = btrfs_header_owner(buf);
6177         key.type = BTRFS_ROOT_ITEM_KEY;
6178         key.offset = (u64)-1;
6179
6180         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6181         if (IS_ERR(ref_root))
6182                 return 1;
6183
6184         level = btrfs_header_level(buf);
6185         if (level == 0)
6186                 btrfs_item_key_to_cpu(buf, &key, 0);
6187         else
6188                 btrfs_node_key_to_cpu(buf, &key, 0);
6189
6190         btrfs_init_path(&path);
6191         path.lowest_level = level + 1;
6192         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6193         if (ret < 0)
6194                 return 0;
6195
6196         parent = path.nodes[level + 1];
6197         if (parent && buf->start == btrfs_node_blockptr(parent,
6198                                                         path.slots[level + 1]))
6199                 found = 1;
6200
6201         btrfs_release_path(&path);
6202         return found ? 0 : 1;
6203 }
6204
6205 static int is_extent_tree_record(struct extent_record *rec)
6206 {
6207         struct extent_backref *node, *tmp;
6208         struct tree_backref *back;
6209         int is_extent = 0;
6210
6211         rbtree_postorder_for_each_entry_safe(node, tmp,
6212                                              &rec->backref_tree, node) {
6213                 if (node->is_data)
6214                         return 0;
6215                 back = to_tree_backref(node);
6216                 if (node->full_backref)
6217                         return 0;
6218                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6219                         is_extent = 1;
6220         }
6221         return is_extent;
6222 }
6223
6224
6225 static int record_bad_block_io(struct btrfs_fs_info *info,
6226                                struct cache_tree *extent_cache,
6227                                u64 start, u64 len)
6228 {
6229         struct extent_record *rec;
6230         struct cache_extent *cache;
6231         struct btrfs_key key;
6232
6233         cache = lookup_cache_extent(extent_cache, start, len);
6234         if (!cache)
6235                 return 0;
6236
6237         rec = container_of(cache, struct extent_record, cache);
6238         if (!is_extent_tree_record(rec))
6239                 return 0;
6240
6241         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6242         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6243 }
6244
6245 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6246                        struct extent_buffer *buf, int slot)
6247 {
6248         if (btrfs_header_level(buf)) {
6249                 struct btrfs_key_ptr ptr1, ptr2;
6250
6251                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6252                                    sizeof(struct btrfs_key_ptr));
6253                 read_extent_buffer(buf, &ptr2,
6254                                    btrfs_node_key_ptr_offset(slot + 1),
6255                                    sizeof(struct btrfs_key_ptr));
6256                 write_extent_buffer(buf, &ptr1,
6257                                     btrfs_node_key_ptr_offset(slot + 1),
6258                                     sizeof(struct btrfs_key_ptr));
6259                 write_extent_buffer(buf, &ptr2,
6260                                     btrfs_node_key_ptr_offset(slot),
6261                                     sizeof(struct btrfs_key_ptr));
6262                 if (slot == 0) {
6263                         struct btrfs_disk_key key;
6264                         btrfs_node_key(buf, &key, 0);
6265                         btrfs_fixup_low_keys(root, path, &key,
6266                                              btrfs_header_level(buf) + 1);
6267                 }
6268         } else {
6269                 struct btrfs_item *item1, *item2;
6270                 struct btrfs_key k1, k2;
6271                 char *item1_data, *item2_data;
6272                 u32 item1_offset, item2_offset, item1_size, item2_size;
6273
6274                 item1 = btrfs_item_nr(slot);
6275                 item2 = btrfs_item_nr(slot + 1);
6276                 btrfs_item_key_to_cpu(buf, &k1, slot);
6277                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6278                 item1_offset = btrfs_item_offset(buf, item1);
6279                 item2_offset = btrfs_item_offset(buf, item2);
6280                 item1_size = btrfs_item_size(buf, item1);
6281                 item2_size = btrfs_item_size(buf, item2);
6282
6283                 item1_data = malloc(item1_size);
6284                 if (!item1_data)
6285                         return -ENOMEM;
6286                 item2_data = malloc(item2_size);
6287                 if (!item2_data) {
6288                         free(item1_data);
6289                         return -ENOMEM;
6290                 }
6291
6292                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6293                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6294
6295                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6296                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6297                 free(item1_data);
6298                 free(item2_data);
6299
6300                 btrfs_set_item_offset(buf, item1, item2_offset);
6301                 btrfs_set_item_offset(buf, item2, item1_offset);
6302                 btrfs_set_item_size(buf, item1, item2_size);
6303                 btrfs_set_item_size(buf, item2, item1_size);
6304
6305                 path->slots[0] = slot;
6306                 btrfs_set_item_key_unsafe(root, path, &k2);
6307                 path->slots[0] = slot + 1;
6308                 btrfs_set_item_key_unsafe(root, path, &k1);
6309         }
6310         return 0;
6311 }
6312
6313 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6314 {
6315         struct extent_buffer *buf;
6316         struct btrfs_key k1, k2;
6317         int i;
6318         int level = path->lowest_level;
6319         int ret = -EIO;
6320
6321         buf = path->nodes[level];
6322         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6323                 if (level) {
6324                         btrfs_node_key_to_cpu(buf, &k1, i);
6325                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6326                 } else {
6327                         btrfs_item_key_to_cpu(buf, &k1, i);
6328                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6329                 }
6330                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6331                         continue;
6332                 ret = swap_values(root, path, buf, i);
6333                 if (ret)
6334                         break;
6335                 btrfs_mark_buffer_dirty(buf);
6336                 i = 0;
6337         }
6338         return ret;
6339 }
6340
6341 static int delete_bogus_item(struct btrfs_root *root,
6342                              struct btrfs_path *path,
6343                              struct extent_buffer *buf, int slot)
6344 {
6345         struct btrfs_key key;
6346         int nritems = btrfs_header_nritems(buf);
6347
6348         btrfs_item_key_to_cpu(buf, &key, slot);
6349
6350         /* These are all the keys we can deal with missing. */
6351         if (key.type != BTRFS_DIR_INDEX_KEY &&
6352             key.type != BTRFS_EXTENT_ITEM_KEY &&
6353             key.type != BTRFS_METADATA_ITEM_KEY &&
6354             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6355             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6356                 return -1;
6357
6358         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6359                (unsigned long long)key.objectid, key.type,
6360                (unsigned long long)key.offset, slot, buf->start);
6361         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6362                               btrfs_item_nr_offset(slot + 1),
6363                               sizeof(struct btrfs_item) *
6364                               (nritems - slot - 1));
6365         btrfs_set_header_nritems(buf, nritems - 1);
6366         if (slot == 0) {
6367                 struct btrfs_disk_key disk_key;
6368
6369                 btrfs_item_key(buf, &disk_key, 0);
6370                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6371         }
6372         btrfs_mark_buffer_dirty(buf);
6373         return 0;
6374 }
6375
6376 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6377 {
6378         struct extent_buffer *buf;
6379         int i;
6380         int ret = 0;
6381
6382         /* We should only get this for leaves */
6383         BUG_ON(path->lowest_level);
6384         buf = path->nodes[0];
6385 again:
6386         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6387                 unsigned int shift = 0, offset;
6388
6389                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6390                     BTRFS_LEAF_DATA_SIZE(root)) {
6391                         if (btrfs_item_end_nr(buf, i) >
6392                             BTRFS_LEAF_DATA_SIZE(root)) {
6393                                 ret = delete_bogus_item(root, path, buf, i);
6394                                 if (!ret)
6395                                         goto again;
6396                                 fprintf(stderr, "item is off the end of the "
6397                                         "leaf, can't fix\n");
6398                                 ret = -EIO;
6399                                 break;
6400                         }
6401                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6402                                 btrfs_item_end_nr(buf, i);
6403                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6404                            btrfs_item_offset_nr(buf, i - 1)) {
6405                         if (btrfs_item_end_nr(buf, i) >
6406                             btrfs_item_offset_nr(buf, i - 1)) {
6407                                 ret = delete_bogus_item(root, path, buf, i);
6408                                 if (!ret)
6409                                         goto again;
6410                                 fprintf(stderr, "items overlap, can't fix\n");
6411                                 ret = -EIO;
6412                                 break;
6413                         }
6414                         shift = btrfs_item_offset_nr(buf, i - 1) -
6415                                 btrfs_item_end_nr(buf, i);
6416                 }
6417                 if (!shift)
6418                         continue;
6419
6420                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6421                        i, shift, (unsigned long long)buf->start);
6422                 offset = btrfs_item_offset_nr(buf, i);
6423                 memmove_extent_buffer(buf,
6424                                       btrfs_leaf_data(buf) + offset + shift,
6425                                       btrfs_leaf_data(buf) + offset,
6426                                       btrfs_item_size_nr(buf, i));
6427                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6428                                       offset + shift);
6429                 btrfs_mark_buffer_dirty(buf);
6430         }
6431
6432         /*
6433          * We may have moved things, in which case we want to exit so we don't
6434          * write those changes out.  Once we have proper abort functionality in
6435          * progs this can be changed to something nicer.
6436          */
6437         BUG_ON(ret);
6438         return ret;
6439 }
6440
6441 /*
6442  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6443  * then just return -EIO.
6444  */
6445 static int try_to_fix_bad_block(struct btrfs_root *root,
6446                                 struct extent_buffer *buf,
6447                                 enum btrfs_tree_block_status status)
6448 {
6449         struct btrfs_trans_handle *trans;
6450         struct ulist *roots;
6451         struct ulist_node *node;
6452         struct btrfs_root *search_root;
6453         struct btrfs_path path;
6454         struct ulist_iterator iter;
6455         struct btrfs_key root_key, key;
6456         int ret;
6457
6458         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6459             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6460                 return -EIO;
6461
6462         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6463         if (ret)
6464                 return -EIO;
6465
6466         btrfs_init_path(&path);
6467         ULIST_ITER_INIT(&iter);
6468         while ((node = ulist_next(roots, &iter))) {
6469                 root_key.objectid = node->val;
6470                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6471                 root_key.offset = (u64)-1;
6472
6473                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6474                 if (IS_ERR(root)) {
6475                         ret = -EIO;
6476                         break;
6477                 }
6478
6479
6480                 trans = btrfs_start_transaction(search_root, 0);
6481                 if (IS_ERR(trans)) {
6482                         ret = PTR_ERR(trans);
6483                         break;
6484                 }
6485
6486                 path.lowest_level = btrfs_header_level(buf);
6487                 path.skip_check_block = 1;
6488                 if (path.lowest_level)
6489                         btrfs_node_key_to_cpu(buf, &key, 0);
6490                 else
6491                         btrfs_item_key_to_cpu(buf, &key, 0);
6492                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6493                 if (ret) {
6494                         ret = -EIO;
6495                         btrfs_commit_transaction(trans, search_root);
6496                         break;
6497                 }
6498                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6499                         ret = fix_key_order(search_root, &path);
6500                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6501                         ret = fix_item_offset(search_root, &path);
6502                 if (ret) {
6503                         btrfs_commit_transaction(trans, search_root);
6504                         break;
6505                 }
6506                 btrfs_release_path(&path);
6507                 btrfs_commit_transaction(trans, search_root);
6508         }
6509         ulist_free(roots);
6510         btrfs_release_path(&path);
6511         return ret;
6512 }
6513
6514 static int check_block(struct btrfs_root *root,
6515                        struct cache_tree *extent_cache,
6516                        struct extent_buffer *buf, u64 flags)
6517 {
6518         struct extent_record *rec;
6519         struct cache_extent *cache;
6520         struct btrfs_key key;
6521         enum btrfs_tree_block_status status;
6522         int ret = 0;
6523         int level;
6524
6525         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6526         if (!cache)
6527                 return 1;
6528         rec = container_of(cache, struct extent_record, cache);
6529         rec->generation = btrfs_header_generation(buf);
6530
6531         level = btrfs_header_level(buf);
6532         if (btrfs_header_nritems(buf) > 0) {
6533
6534                 if (level == 0)
6535                         btrfs_item_key_to_cpu(buf, &key, 0);
6536                 else
6537                         btrfs_node_key_to_cpu(buf, &key, 0);
6538
6539                 rec->info_objectid = key.objectid;
6540         }
6541         rec->info_level = level;
6542
6543         if (btrfs_is_leaf(buf))
6544                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6545         else
6546                 status = btrfs_check_node(root, &rec->parent_key, buf);
6547
6548         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6549                 if (repair)
6550                         status = try_to_fix_bad_block(root, buf, status);
6551                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6552                         ret = -EIO;
6553                         fprintf(stderr, "bad block %llu\n",
6554                                 (unsigned long long)buf->start);
6555                 } else {
6556                         /*
6557                          * Signal to callers we need to start the scan over
6558                          * again since we'll have cowed blocks.
6559                          */
6560                         ret = -EAGAIN;
6561                 }
6562         } else {
6563                 rec->content_checked = 1;
6564                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6565                         rec->owner_ref_checked = 1;
6566                 else {
6567                         ret = check_owner_ref(root, rec, buf);
6568                         if (!ret)
6569                                 rec->owner_ref_checked = 1;
6570                 }
6571         }
6572         if (!ret)
6573                 maybe_free_extent_rec(extent_cache, rec);
6574         return ret;
6575 }
6576
6577 #if 0
6578 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6579                                                 u64 parent, u64 root)
6580 {
6581         struct list_head *cur = rec->backrefs.next;
6582         struct extent_backref *node;
6583         struct tree_backref *back;
6584
6585         while(cur != &rec->backrefs) {
6586                 node = to_extent_backref(cur);
6587                 cur = cur->next;
6588                 if (node->is_data)
6589                         continue;
6590                 back = to_tree_backref(node);
6591                 if (parent > 0) {
6592                         if (!node->full_backref)
6593                                 continue;
6594                         if (parent == back->parent)
6595                                 return back;
6596                 } else {
6597                         if (node->full_backref)
6598                                 continue;
6599                         if (back->root == root)
6600                                 return back;
6601                 }
6602         }
6603         return NULL;
6604 }
6605 #endif
6606
6607 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6608                                                 u64 parent, u64 root)
6609 {
6610         struct tree_backref *ref = malloc(sizeof(*ref));
6611
6612         if (!ref)
6613                 return NULL;
6614         memset(&ref->node, 0, sizeof(ref->node));
6615         if (parent > 0) {
6616                 ref->parent = parent;
6617                 ref->node.full_backref = 1;
6618         } else {
6619                 ref->root = root;
6620                 ref->node.full_backref = 0;
6621         }
6622
6623         return ref;
6624 }
6625
6626 #if 0
6627 static struct data_backref *find_data_backref(struct extent_record *rec,
6628                                                 u64 parent, u64 root,
6629                                                 u64 owner, u64 offset,
6630                                                 int found_ref,
6631                                                 u64 disk_bytenr, u64 bytes)
6632 {
6633         struct list_head *cur = rec->backrefs.next;
6634         struct extent_backref *node;
6635         struct data_backref *back;
6636
6637         while(cur != &rec->backrefs) {
6638                 node = to_extent_backref(cur);
6639                 cur = cur->next;
6640                 if (!node->is_data)
6641                         continue;
6642                 back = to_data_backref(node);
6643                 if (parent > 0) {
6644                         if (!node->full_backref)
6645                                 continue;
6646                         if (parent == back->parent)
6647                                 return back;
6648                 } else {
6649                         if (node->full_backref)
6650                                 continue;
6651                         if (back->root == root && back->owner == owner &&
6652                             back->offset == offset) {
6653                                 if (found_ref && node->found_ref &&
6654                                     (back->bytes != bytes ||
6655                                     back->disk_bytenr != disk_bytenr))
6656                                         continue;
6657                                 return back;
6658                         }
6659                 }
6660         }
6661         return NULL;
6662 }
6663 #endif
6664
6665 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6666                                                 u64 parent, u64 root,
6667                                                 u64 owner, u64 offset,
6668                                                 u64 max_size)
6669 {
6670         struct data_backref *ref = malloc(sizeof(*ref));
6671
6672         if (!ref)
6673                 return NULL;
6674         memset(&ref->node, 0, sizeof(ref->node));
6675         ref->node.is_data = 1;
6676
6677         if (parent > 0) {
6678                 ref->parent = parent;
6679                 ref->owner = 0;
6680                 ref->offset = 0;
6681                 ref->node.full_backref = 1;
6682         } else {
6683                 ref->root = root;
6684                 ref->owner = owner;
6685                 ref->offset = offset;
6686                 ref->node.full_backref = 0;
6687         }
6688         ref->bytes = max_size;
6689         ref->found_ref = 0;
6690         ref->num_refs = 0;
6691         if (max_size > rec->max_size)
6692                 rec->max_size = max_size;
6693         return ref;
6694 }
6695
6696 /* Check if the type of extent matches with its chunk */
6697 static void check_extent_type(struct extent_record *rec)
6698 {
6699         struct btrfs_block_group_cache *bg_cache;
6700
6701         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6702         if (!bg_cache)
6703                 return;
6704
6705         /* data extent, check chunk directly*/
6706         if (!rec->metadata) {
6707                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6708                         rec->wrong_chunk_type = 1;
6709                 return;
6710         }
6711
6712         /* metadata extent, check the obvious case first */
6713         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6714                                  BTRFS_BLOCK_GROUP_METADATA))) {
6715                 rec->wrong_chunk_type = 1;
6716                 return;
6717         }
6718
6719         /*
6720          * Check SYSTEM extent, as it's also marked as metadata, we can only
6721          * make sure it's a SYSTEM extent by its backref
6722          */
6723         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6724                 struct extent_backref *node;
6725                 struct tree_backref *tback;
6726                 u64 bg_type;
6727
6728                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6729                 if (node->is_data) {
6730                         /* tree block shouldn't have data backref */
6731                         rec->wrong_chunk_type = 1;
6732                         return;
6733                 }
6734                 tback = container_of(node, struct tree_backref, node);
6735
6736                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6737                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6738                 else
6739                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6740                 if (!(bg_cache->flags & bg_type))
6741                         rec->wrong_chunk_type = 1;
6742         }
6743 }
6744
6745 /*
6746  * Allocate a new extent record, fill default values from @tmpl and insert int
6747  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6748  * the cache, otherwise it fails.
6749  */
6750 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6751                 struct extent_record *tmpl)
6752 {
6753         struct extent_record *rec;
6754         int ret = 0;
6755
6756         BUG_ON(tmpl->max_size == 0);
6757         rec = malloc(sizeof(*rec));
6758         if (!rec)
6759                 return -ENOMEM;
6760         rec->start = tmpl->start;
6761         rec->max_size = tmpl->max_size;
6762         rec->nr = max(tmpl->nr, tmpl->max_size);
6763         rec->found_rec = tmpl->found_rec;
6764         rec->content_checked = tmpl->content_checked;
6765         rec->owner_ref_checked = tmpl->owner_ref_checked;
6766         rec->num_duplicates = 0;
6767         rec->metadata = tmpl->metadata;
6768         rec->flag_block_full_backref = FLAG_UNSET;
6769         rec->bad_full_backref = 0;
6770         rec->crossing_stripes = 0;
6771         rec->wrong_chunk_type = 0;
6772         rec->is_root = tmpl->is_root;
6773         rec->refs = tmpl->refs;
6774         rec->extent_item_refs = tmpl->extent_item_refs;
6775         rec->parent_generation = tmpl->parent_generation;
6776         INIT_LIST_HEAD(&rec->backrefs);
6777         INIT_LIST_HEAD(&rec->dups);
6778         INIT_LIST_HEAD(&rec->list);
6779         rec->backref_tree = RB_ROOT;
6780         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6781         rec->cache.start = tmpl->start;
6782         rec->cache.size = tmpl->nr;
6783         ret = insert_cache_extent(extent_cache, &rec->cache);
6784         if (ret) {
6785                 free(rec);
6786                 return ret;
6787         }
6788         bytes_used += rec->nr;
6789
6790         if (tmpl->metadata)
6791                 rec->crossing_stripes = check_crossing_stripes(global_info,
6792                                 rec->start, global_info->nodesize);
6793         check_extent_type(rec);
6794         return ret;
6795 }
6796
6797 /*
6798  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6799  * some are hints:
6800  * - refs              - if found, increase refs
6801  * - is_root           - if found, set
6802  * - content_checked   - if found, set
6803  * - owner_ref_checked - if found, set
6804  *
6805  * If not found, create a new one, initialize and insert.
6806  */
6807 static int add_extent_rec(struct cache_tree *extent_cache,
6808                 struct extent_record *tmpl)
6809 {
6810         struct extent_record *rec;
6811         struct cache_extent *cache;
6812         int ret = 0;
6813         int dup = 0;
6814
6815         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6816         if (cache) {
6817                 rec = container_of(cache, struct extent_record, cache);
6818                 if (tmpl->refs)
6819                         rec->refs++;
6820                 if (rec->nr == 1)
6821                         rec->nr = max(tmpl->nr, tmpl->max_size);
6822
6823                 /*
6824                  * We need to make sure to reset nr to whatever the extent
6825                  * record says was the real size, this way we can compare it to
6826                  * the backrefs.
6827                  */
6828                 if (tmpl->found_rec) {
6829                         if (tmpl->start != rec->start || rec->found_rec) {
6830                                 struct extent_record *tmp;
6831
6832                                 dup = 1;
6833                                 if (list_empty(&rec->list))
6834                                         list_add_tail(&rec->list,
6835                                                       &duplicate_extents);
6836
6837                                 /*
6838                                  * We have to do this song and dance in case we
6839                                  * find an extent record that falls inside of
6840                                  * our current extent record but does not have
6841                                  * the same objectid.
6842                                  */
6843                                 tmp = malloc(sizeof(*tmp));
6844                                 if (!tmp)
6845                                         return -ENOMEM;
6846                                 tmp->start = tmpl->start;
6847                                 tmp->max_size = tmpl->max_size;
6848                                 tmp->nr = tmpl->nr;
6849                                 tmp->found_rec = 1;
6850                                 tmp->metadata = tmpl->metadata;
6851                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6852                                 INIT_LIST_HEAD(&tmp->list);
6853                                 list_add_tail(&tmp->list, &rec->dups);
6854                                 rec->num_duplicates++;
6855                         } else {
6856                                 rec->nr = tmpl->nr;
6857                                 rec->found_rec = 1;
6858                         }
6859                 }
6860
6861                 if (tmpl->extent_item_refs && !dup) {
6862                         if (rec->extent_item_refs) {
6863                                 fprintf(stderr, "block %llu rec "
6864                                         "extent_item_refs %llu, passed %llu\n",
6865                                         (unsigned long long)tmpl->start,
6866                                         (unsigned long long)
6867                                                         rec->extent_item_refs,
6868                                         (unsigned long long)tmpl->extent_item_refs);
6869                         }
6870                         rec->extent_item_refs = tmpl->extent_item_refs;
6871                 }
6872                 if (tmpl->is_root)
6873                         rec->is_root = 1;
6874                 if (tmpl->content_checked)
6875                         rec->content_checked = 1;
6876                 if (tmpl->owner_ref_checked)
6877                         rec->owner_ref_checked = 1;
6878                 memcpy(&rec->parent_key, &tmpl->parent_key,
6879                                 sizeof(tmpl->parent_key));
6880                 if (tmpl->parent_generation)
6881                         rec->parent_generation = tmpl->parent_generation;
6882                 if (rec->max_size < tmpl->max_size)
6883                         rec->max_size = tmpl->max_size;
6884
6885                 /*
6886                  * A metadata extent can't cross stripe_len boundary, otherwise
6887                  * kernel scrub won't be able to handle it.
6888                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6889                  * it.
6890                  */
6891                 if (tmpl->metadata)
6892                         rec->crossing_stripes = check_crossing_stripes(
6893                                         global_info, rec->start,
6894                                         global_info->nodesize);
6895                 check_extent_type(rec);
6896                 maybe_free_extent_rec(extent_cache, rec);
6897                 return ret;
6898         }
6899
6900         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6901
6902         return ret;
6903 }
6904
6905 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6906                             u64 parent, u64 root, int found_ref)
6907 {
6908         struct extent_record *rec;
6909         struct tree_backref *back;
6910         struct cache_extent *cache;
6911         int ret;
6912         bool insert = false;
6913
6914         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6915         if (!cache) {
6916                 struct extent_record tmpl;
6917
6918                 memset(&tmpl, 0, sizeof(tmpl));
6919                 tmpl.start = bytenr;
6920                 tmpl.nr = 1;
6921                 tmpl.metadata = 1;
6922                 tmpl.max_size = 1;
6923
6924                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6925                 if (ret)
6926                         return ret;
6927
6928                 /* really a bug in cache_extent implement now */
6929                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6930                 if (!cache)
6931                         return -ENOENT;
6932         }
6933
6934         rec = container_of(cache, struct extent_record, cache);
6935         if (rec->start != bytenr) {
6936                 /*
6937                  * Several cause, from unaligned bytenr to over lapping extents
6938                  */
6939                 return -EEXIST;
6940         }
6941
6942         back = find_tree_backref(rec, parent, root);
6943         if (!back) {
6944                 back = alloc_tree_backref(rec, parent, root);
6945                 if (!back)
6946                         return -ENOMEM;
6947                 insert = true;
6948         }
6949
6950         if (found_ref) {
6951                 if (back->node.found_ref) {
6952                         fprintf(stderr, "Extent back ref already exists "
6953                                 "for %llu parent %llu root %llu \n",
6954                                 (unsigned long long)bytenr,
6955                                 (unsigned long long)parent,
6956                                 (unsigned long long)root);
6957                 }
6958                 back->node.found_ref = 1;
6959         } else {
6960                 if (back->node.found_extent_tree) {
6961                         fprintf(stderr, "Extent back ref already exists "
6962                                 "for %llu parent %llu root %llu \n",
6963                                 (unsigned long long)bytenr,
6964                                 (unsigned long long)parent,
6965                                 (unsigned long long)root);
6966                 }
6967                 back->node.found_extent_tree = 1;
6968         }
6969         if (insert)
6970                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6971                         compare_extent_backref));
6972         check_extent_type(rec);
6973         maybe_free_extent_rec(extent_cache, rec);
6974         return 0;
6975 }
6976
6977 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6978                             u64 parent, u64 root, u64 owner, u64 offset,
6979                             u32 num_refs, int found_ref, u64 max_size)
6980 {
6981         struct extent_record *rec;
6982         struct data_backref *back;
6983         struct cache_extent *cache;
6984         int ret;
6985         bool insert = false;
6986
6987         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6988         if (!cache) {
6989                 struct extent_record tmpl;
6990
6991                 memset(&tmpl, 0, sizeof(tmpl));
6992                 tmpl.start = bytenr;
6993                 tmpl.nr = 1;
6994                 tmpl.max_size = max_size;
6995
6996                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6997                 if (ret)
6998                         return ret;
6999
7000                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7001                 if (!cache)
7002                         abort();
7003         }
7004
7005         rec = container_of(cache, struct extent_record, cache);
7006         if (rec->max_size < max_size)
7007                 rec->max_size = max_size;
7008
7009         /*
7010          * If found_ref is set then max_size is the real size and must match the
7011          * existing refs.  So if we have already found a ref then we need to
7012          * make sure that this ref matches the existing one, otherwise we need
7013          * to add a new backref so we can notice that the backrefs don't match
7014          * and we need to figure out who is telling the truth.  This is to
7015          * account for that awful fsync bug I introduced where we'd end up with
7016          * a btrfs_file_extent_item that would have its length include multiple
7017          * prealloc extents or point inside of a prealloc extent.
7018          */
7019         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7020                                  bytenr, max_size);
7021         if (!back) {
7022                 back = alloc_data_backref(rec, parent, root, owner, offset,
7023                                           max_size);
7024                 BUG_ON(!back);
7025                 insert = true;
7026         }
7027
7028         if (found_ref) {
7029                 BUG_ON(num_refs != 1);
7030                 if (back->node.found_ref)
7031                         BUG_ON(back->bytes != max_size);
7032                 back->node.found_ref = 1;
7033                 back->found_ref += 1;
7034                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7035                         back->bytes = max_size;
7036                         back->disk_bytenr = bytenr;
7037
7038                         /* Need to reinsert if not already in the tree */
7039                         if (!insert) {
7040                                 rb_erase(&back->node.node, &rec->backref_tree);
7041                                 insert = true;
7042                         }
7043                 }
7044                 rec->refs += 1;
7045                 rec->content_checked = 1;
7046                 rec->owner_ref_checked = 1;
7047         } else {
7048                 if (back->node.found_extent_tree) {
7049                         fprintf(stderr, "Extent back ref already exists "
7050                                 "for %llu parent %llu root %llu "
7051                                 "owner %llu offset %llu num_refs %lu\n",
7052                                 (unsigned long long)bytenr,
7053                                 (unsigned long long)parent,
7054                                 (unsigned long long)root,
7055                                 (unsigned long long)owner,
7056                                 (unsigned long long)offset,
7057                                 (unsigned long)num_refs);
7058                 }
7059                 back->num_refs = num_refs;
7060                 back->node.found_extent_tree = 1;
7061         }
7062         if (insert)
7063                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7064                         compare_extent_backref));
7065
7066         maybe_free_extent_rec(extent_cache, rec);
7067         return 0;
7068 }
7069
7070 static int add_pending(struct cache_tree *pending,
7071                        struct cache_tree *seen, u64 bytenr, u32 size)
7072 {
7073         int ret;
7074         ret = add_cache_extent(seen, bytenr, size);
7075         if (ret)
7076                 return ret;
7077         add_cache_extent(pending, bytenr, size);
7078         return 0;
7079 }
7080
7081 static int pick_next_pending(struct cache_tree *pending,
7082                         struct cache_tree *reada,
7083                         struct cache_tree *nodes,
7084                         u64 last, struct block_info *bits, int bits_nr,
7085                         int *reada_bits)
7086 {
7087         unsigned long node_start = last;
7088         struct cache_extent *cache;
7089         int ret;
7090
7091         cache = search_cache_extent(reada, 0);
7092         if (cache) {
7093                 bits[0].start = cache->start;
7094                 bits[0].size = cache->size;
7095                 *reada_bits = 1;
7096                 return 1;
7097         }
7098         *reada_bits = 0;
7099         if (node_start > 32768)
7100                 node_start -= 32768;
7101
7102         cache = search_cache_extent(nodes, node_start);
7103         if (!cache)
7104                 cache = search_cache_extent(nodes, 0);
7105
7106         if (!cache) {
7107                  cache = search_cache_extent(pending, 0);
7108                  if (!cache)
7109                          return 0;
7110                  ret = 0;
7111                  do {
7112                          bits[ret].start = cache->start;
7113                          bits[ret].size = cache->size;
7114                          cache = next_cache_extent(cache);
7115                          ret++;
7116                  } while (cache && ret < bits_nr);
7117                  return ret;
7118         }
7119
7120         ret = 0;
7121         do {
7122                 bits[ret].start = cache->start;
7123                 bits[ret].size = cache->size;
7124                 cache = next_cache_extent(cache);
7125                 ret++;
7126         } while (cache && ret < bits_nr);
7127
7128         if (bits_nr - ret > 8) {
7129                 u64 lookup = bits[0].start + bits[0].size;
7130                 struct cache_extent *next;
7131                 next = search_cache_extent(pending, lookup);
7132                 while(next) {
7133                         if (next->start - lookup > 32768)
7134                                 break;
7135                         bits[ret].start = next->start;
7136                         bits[ret].size = next->size;
7137                         lookup = next->start + next->size;
7138                         ret++;
7139                         if (ret == bits_nr)
7140                                 break;
7141                         next = next_cache_extent(next);
7142                         if (!next)
7143                                 break;
7144                 }
7145         }
7146         return ret;
7147 }
7148
7149 static void free_chunk_record(struct cache_extent *cache)
7150 {
7151         struct chunk_record *rec;
7152
7153         rec = container_of(cache, struct chunk_record, cache);
7154         list_del_init(&rec->list);
7155         list_del_init(&rec->dextents);
7156         free(rec);
7157 }
7158
7159 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7160 {
7161         cache_tree_free_extents(chunk_cache, free_chunk_record);
7162 }
7163
7164 static void free_device_record(struct rb_node *node)
7165 {
7166         struct device_record *rec;
7167
7168         rec = container_of(node, struct device_record, node);
7169         free(rec);
7170 }
7171
7172 FREE_RB_BASED_TREE(device_cache, free_device_record);
7173
7174 int insert_block_group_record(struct block_group_tree *tree,
7175                               struct block_group_record *bg_rec)
7176 {
7177         int ret;
7178
7179         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7180         if (ret)
7181                 return ret;
7182
7183         list_add_tail(&bg_rec->list, &tree->block_groups);
7184         return 0;
7185 }
7186
7187 static void free_block_group_record(struct cache_extent *cache)
7188 {
7189         struct block_group_record *rec;
7190
7191         rec = container_of(cache, struct block_group_record, cache);
7192         list_del_init(&rec->list);
7193         free(rec);
7194 }
7195
7196 void free_block_group_tree(struct block_group_tree *tree)
7197 {
7198         cache_tree_free_extents(&tree->tree, free_block_group_record);
7199 }
7200
7201 int insert_device_extent_record(struct device_extent_tree *tree,
7202                                 struct device_extent_record *de_rec)
7203 {
7204         int ret;
7205
7206         /*
7207          * Device extent is a bit different from the other extents, because
7208          * the extents which belong to the different devices may have the
7209          * same start and size, so we need use the special extent cache
7210          * search/insert functions.
7211          */
7212         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7213         if (ret)
7214                 return ret;
7215
7216         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7217         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7218         return 0;
7219 }
7220
7221 static void free_device_extent_record(struct cache_extent *cache)
7222 {
7223         struct device_extent_record *rec;
7224
7225         rec = container_of(cache, struct device_extent_record, cache);
7226         if (!list_empty(&rec->chunk_list))
7227                 list_del_init(&rec->chunk_list);
7228         if (!list_empty(&rec->device_list))
7229                 list_del_init(&rec->device_list);
7230         free(rec);
7231 }
7232
7233 void free_device_extent_tree(struct device_extent_tree *tree)
7234 {
7235         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7236 }
7237
7238 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7239 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7240                                  struct extent_buffer *leaf, int slot)
7241 {
7242         struct btrfs_extent_ref_v0 *ref0;
7243         struct btrfs_key key;
7244         int ret;
7245
7246         btrfs_item_key_to_cpu(leaf, &key, slot);
7247         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7248         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7249                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7250                                 0, 0);
7251         } else {
7252                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7253                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7254         }
7255         return ret;
7256 }
7257 #endif
7258
7259 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7260                                             struct btrfs_key *key,
7261                                             int slot)
7262 {
7263         struct btrfs_chunk *ptr;
7264         struct chunk_record *rec;
7265         int num_stripes, i;
7266
7267         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7268         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7269
7270         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7271         if (!rec) {
7272                 fprintf(stderr, "memory allocation failed\n");
7273                 exit(-1);
7274         }
7275
7276         INIT_LIST_HEAD(&rec->list);
7277         INIT_LIST_HEAD(&rec->dextents);
7278         rec->bg_rec = NULL;
7279
7280         rec->cache.start = key->offset;
7281         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7282
7283         rec->generation = btrfs_header_generation(leaf);
7284
7285         rec->objectid = key->objectid;
7286         rec->type = key->type;
7287         rec->offset = key->offset;
7288
7289         rec->length = rec->cache.size;
7290         rec->owner = btrfs_chunk_owner(leaf, ptr);
7291         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7292         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7293         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7294         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7295         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7296         rec->num_stripes = num_stripes;
7297         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7298
7299         for (i = 0; i < rec->num_stripes; ++i) {
7300                 rec->stripes[i].devid =
7301                         btrfs_stripe_devid_nr(leaf, ptr, i);
7302                 rec->stripes[i].offset =
7303                         btrfs_stripe_offset_nr(leaf, ptr, i);
7304                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7305                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7306                                 BTRFS_UUID_SIZE);
7307         }
7308
7309         return rec;
7310 }
7311
7312 static int process_chunk_item(struct cache_tree *chunk_cache,
7313                               struct btrfs_key *key, struct extent_buffer *eb,
7314                               int slot)
7315 {
7316         struct chunk_record *rec;
7317         struct btrfs_chunk *chunk;
7318         int ret = 0;
7319
7320         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7321         /*
7322          * Do extra check for this chunk item,
7323          *
7324          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7325          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7326          * and owner<->key_type check.
7327          */
7328         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7329                                       key->offset);
7330         if (ret < 0) {
7331                 error("chunk(%llu, %llu) is not valid, ignore it",
7332                       key->offset, btrfs_chunk_length(eb, chunk));
7333                 return 0;
7334         }
7335         rec = btrfs_new_chunk_record(eb, key, slot);
7336         ret = insert_cache_extent(chunk_cache, &rec->cache);
7337         if (ret) {
7338                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7339                         rec->offset, rec->length);
7340                 free(rec);
7341         }
7342
7343         return ret;
7344 }
7345
7346 static int process_device_item(struct rb_root *dev_cache,
7347                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7348 {
7349         struct btrfs_dev_item *ptr;
7350         struct device_record *rec;
7351         int ret = 0;
7352
7353         ptr = btrfs_item_ptr(eb,
7354                 slot, struct btrfs_dev_item);
7355
7356         rec = malloc(sizeof(*rec));
7357         if (!rec) {
7358                 fprintf(stderr, "memory allocation failed\n");
7359                 return -ENOMEM;
7360         }
7361
7362         rec->devid = key->offset;
7363         rec->generation = btrfs_header_generation(eb);
7364
7365         rec->objectid = key->objectid;
7366         rec->type = key->type;
7367         rec->offset = key->offset;
7368
7369         rec->devid = btrfs_device_id(eb, ptr);
7370         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7371         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7372
7373         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7374         if (ret) {
7375                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7376                 free(rec);
7377         }
7378
7379         return ret;
7380 }
7381
7382 struct block_group_record *
7383 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7384                              int slot)
7385 {
7386         struct btrfs_block_group_item *ptr;
7387         struct block_group_record *rec;
7388
7389         rec = calloc(1, sizeof(*rec));
7390         if (!rec) {
7391                 fprintf(stderr, "memory allocation failed\n");
7392                 exit(-1);
7393         }
7394
7395         rec->cache.start = key->objectid;
7396         rec->cache.size = key->offset;
7397
7398         rec->generation = btrfs_header_generation(leaf);
7399
7400         rec->objectid = key->objectid;
7401         rec->type = key->type;
7402         rec->offset = key->offset;
7403
7404         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7405         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7406
7407         INIT_LIST_HEAD(&rec->list);
7408
7409         return rec;
7410 }
7411
7412 static int process_block_group_item(struct block_group_tree *block_group_cache,
7413                                     struct btrfs_key *key,
7414                                     struct extent_buffer *eb, int slot)
7415 {
7416         struct block_group_record *rec;
7417         int ret = 0;
7418
7419         rec = btrfs_new_block_group_record(eb, key, slot);
7420         ret = insert_block_group_record(block_group_cache, rec);
7421         if (ret) {
7422                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7423                         rec->objectid, rec->offset);
7424                 free(rec);
7425         }
7426
7427         return ret;
7428 }
7429
7430 struct device_extent_record *
7431 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7432                                struct btrfs_key *key, int slot)
7433 {
7434         struct device_extent_record *rec;
7435         struct btrfs_dev_extent *ptr;
7436
7437         rec = calloc(1, sizeof(*rec));
7438         if (!rec) {
7439                 fprintf(stderr, "memory allocation failed\n");
7440                 exit(-1);
7441         }
7442
7443         rec->cache.objectid = key->objectid;
7444         rec->cache.start = key->offset;
7445
7446         rec->generation = btrfs_header_generation(leaf);
7447
7448         rec->objectid = key->objectid;
7449         rec->type = key->type;
7450         rec->offset = key->offset;
7451
7452         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7453         rec->chunk_objecteid =
7454                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7455         rec->chunk_offset =
7456                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7457         rec->length = btrfs_dev_extent_length(leaf, ptr);
7458         rec->cache.size = rec->length;
7459
7460         INIT_LIST_HEAD(&rec->chunk_list);
7461         INIT_LIST_HEAD(&rec->device_list);
7462
7463         return rec;
7464 }
7465
7466 static int
7467 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7468                            struct btrfs_key *key, struct extent_buffer *eb,
7469                            int slot)
7470 {
7471         struct device_extent_record *rec;
7472         int ret;
7473
7474         rec = btrfs_new_device_extent_record(eb, key, slot);
7475         ret = insert_device_extent_record(dev_extent_cache, rec);
7476         if (ret) {
7477                 fprintf(stderr,
7478                         "Device extent[%llu, %llu, %llu] existed.\n",
7479                         rec->objectid, rec->offset, rec->length);
7480                 free(rec);
7481         }
7482
7483         return ret;
7484 }
7485
7486 static int process_extent_item(struct btrfs_root *root,
7487                                struct cache_tree *extent_cache,
7488                                struct extent_buffer *eb, int slot)
7489 {
7490         struct btrfs_extent_item *ei;
7491         struct btrfs_extent_inline_ref *iref;
7492         struct btrfs_extent_data_ref *dref;
7493         struct btrfs_shared_data_ref *sref;
7494         struct btrfs_key key;
7495         struct extent_record tmpl;
7496         unsigned long end;
7497         unsigned long ptr;
7498         int ret;
7499         int type;
7500         u32 item_size = btrfs_item_size_nr(eb, slot);
7501         u64 refs = 0;
7502         u64 offset;
7503         u64 num_bytes;
7504         int metadata = 0;
7505
7506         btrfs_item_key_to_cpu(eb, &key, slot);
7507
7508         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7509                 metadata = 1;
7510                 num_bytes = root->fs_info->nodesize;
7511         } else {
7512                 num_bytes = key.offset;
7513         }
7514
7515         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7516                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7517                       key.objectid, root->fs_info->sectorsize);
7518                 return -EIO;
7519         }
7520         if (item_size < sizeof(*ei)) {
7521 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7522                 struct btrfs_extent_item_v0 *ei0;
7523                 BUG_ON(item_size != sizeof(*ei0));
7524                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7525                 refs = btrfs_extent_refs_v0(eb, ei0);
7526 #else
7527                 BUG();
7528 #endif
7529                 memset(&tmpl, 0, sizeof(tmpl));
7530                 tmpl.start = key.objectid;
7531                 tmpl.nr = num_bytes;
7532                 tmpl.extent_item_refs = refs;
7533                 tmpl.metadata = metadata;
7534                 tmpl.found_rec = 1;
7535                 tmpl.max_size = num_bytes;
7536
7537                 return add_extent_rec(extent_cache, &tmpl);
7538         }
7539
7540         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7541         refs = btrfs_extent_refs(eb, ei);
7542         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7543                 metadata = 1;
7544         else
7545                 metadata = 0;
7546         if (metadata && num_bytes != root->fs_info->nodesize) {
7547                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7548                       num_bytes, root->fs_info->nodesize);
7549                 return -EIO;
7550         }
7551         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7552                 error("ignore invalid data extent, length %llu is not aligned to %u",
7553                       num_bytes, root->fs_info->sectorsize);
7554                 return -EIO;
7555         }
7556
7557         memset(&tmpl, 0, sizeof(tmpl));
7558         tmpl.start = key.objectid;
7559         tmpl.nr = num_bytes;
7560         tmpl.extent_item_refs = refs;
7561         tmpl.metadata = metadata;
7562         tmpl.found_rec = 1;
7563         tmpl.max_size = num_bytes;
7564         add_extent_rec(extent_cache, &tmpl);
7565
7566         ptr = (unsigned long)(ei + 1);
7567         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7568             key.type == BTRFS_EXTENT_ITEM_KEY)
7569                 ptr += sizeof(struct btrfs_tree_block_info);
7570
7571         end = (unsigned long)ei + item_size;
7572         while (ptr < end) {
7573                 iref = (struct btrfs_extent_inline_ref *)ptr;
7574                 type = btrfs_extent_inline_ref_type(eb, iref);
7575                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7576                 switch (type) {
7577                 case BTRFS_TREE_BLOCK_REF_KEY:
7578                         ret = add_tree_backref(extent_cache, key.objectid,
7579                                         0, offset, 0);
7580                         if (ret < 0)
7581                                 error(
7582                         "add_tree_backref failed (extent items tree block): %s",
7583                                       strerror(-ret));
7584                         break;
7585                 case BTRFS_SHARED_BLOCK_REF_KEY:
7586                         ret = add_tree_backref(extent_cache, key.objectid,
7587                                         offset, 0, 0);
7588                         if (ret < 0)
7589                                 error(
7590                         "add_tree_backref failed (extent items shared block): %s",
7591                                       strerror(-ret));
7592                         break;
7593                 case BTRFS_EXTENT_DATA_REF_KEY:
7594                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7595                         add_data_backref(extent_cache, key.objectid, 0,
7596                                         btrfs_extent_data_ref_root(eb, dref),
7597                                         btrfs_extent_data_ref_objectid(eb,
7598                                                                        dref),
7599                                         btrfs_extent_data_ref_offset(eb, dref),
7600                                         btrfs_extent_data_ref_count(eb, dref),
7601                                         0, num_bytes);
7602                         break;
7603                 case BTRFS_SHARED_DATA_REF_KEY:
7604                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7605                         add_data_backref(extent_cache, key.objectid, offset,
7606                                         0, 0, 0,
7607                                         btrfs_shared_data_ref_count(eb, sref),
7608                                         0, num_bytes);
7609                         break;
7610                 default:
7611                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7612                                 key.objectid, key.type, num_bytes);
7613                         goto out;
7614                 }
7615                 ptr += btrfs_extent_inline_ref_size(type);
7616         }
7617         WARN_ON(ptr > end);
7618 out:
7619         return 0;
7620 }
7621
7622 static int check_cache_range(struct btrfs_root *root,
7623                              struct btrfs_block_group_cache *cache,
7624                              u64 offset, u64 bytes)
7625 {
7626         struct btrfs_free_space *entry;
7627         u64 *logical;
7628         u64 bytenr;
7629         int stripe_len;
7630         int i, nr, ret;
7631
7632         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7633                 bytenr = btrfs_sb_offset(i);
7634                 ret = btrfs_rmap_block(root->fs_info,
7635                                        cache->key.objectid, bytenr, 0,
7636                                        &logical, &nr, &stripe_len);
7637                 if (ret)
7638                         return ret;
7639
7640                 while (nr--) {
7641                         if (logical[nr] + stripe_len <= offset)
7642                                 continue;
7643                         if (offset + bytes <= logical[nr])
7644                                 continue;
7645                         if (logical[nr] == offset) {
7646                                 if (stripe_len >= bytes) {
7647                                         free(logical);
7648                                         return 0;
7649                                 }
7650                                 bytes -= stripe_len;
7651                                 offset += stripe_len;
7652                         } else if (logical[nr] < offset) {
7653                                 if (logical[nr] + stripe_len >=
7654                                     offset + bytes) {
7655                                         free(logical);
7656                                         return 0;
7657                                 }
7658                                 bytes = (offset + bytes) -
7659                                         (logical[nr] + stripe_len);
7660                                 offset = logical[nr] + stripe_len;
7661                         } else {
7662                                 /*
7663                                  * Could be tricky, the super may land in the
7664                                  * middle of the area we're checking.  First
7665                                  * check the easiest case, it's at the end.
7666                                  */
7667                                 if (logical[nr] + stripe_len >=
7668                                     bytes + offset) {
7669                                         bytes = logical[nr] - offset;
7670                                         continue;
7671                                 }
7672
7673                                 /* Check the left side */
7674                                 ret = check_cache_range(root, cache,
7675                                                         offset,
7676                                                         logical[nr] - offset);
7677                                 if (ret) {
7678                                         free(logical);
7679                                         return ret;
7680                                 }
7681
7682                                 /* Now we continue with the right side */
7683                                 bytes = (offset + bytes) -
7684                                         (logical[nr] + stripe_len);
7685                                 offset = logical[nr] + stripe_len;
7686                         }
7687                 }
7688
7689                 free(logical);
7690         }
7691
7692         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7693         if (!entry) {
7694                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7695                         offset, offset+bytes);
7696                 return -EINVAL;
7697         }
7698
7699         if (entry->offset != offset) {
7700                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7701                         entry->offset);
7702                 return -EINVAL;
7703         }
7704
7705         if (entry->bytes != bytes) {
7706                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7707                         bytes, entry->bytes, offset);
7708                 return -EINVAL;
7709         }
7710
7711         unlink_free_space(cache->free_space_ctl, entry);
7712         free(entry);
7713         return 0;
7714 }
7715
7716 static int verify_space_cache(struct btrfs_root *root,
7717                               struct btrfs_block_group_cache *cache)
7718 {
7719         struct btrfs_path path;
7720         struct extent_buffer *leaf;
7721         struct btrfs_key key;
7722         u64 last;
7723         int ret = 0;
7724
7725         root = root->fs_info->extent_root;
7726
7727         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7728
7729         btrfs_init_path(&path);
7730         key.objectid = last;
7731         key.offset = 0;
7732         key.type = BTRFS_EXTENT_ITEM_KEY;
7733         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7734         if (ret < 0)
7735                 goto out;
7736         ret = 0;
7737         while (1) {
7738                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7739                         ret = btrfs_next_leaf(root, &path);
7740                         if (ret < 0)
7741                                 goto out;
7742                         if (ret > 0) {
7743                                 ret = 0;
7744                                 break;
7745                         }
7746                 }
7747                 leaf = path.nodes[0];
7748                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7749                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7750                         break;
7751                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7752                     key.type != BTRFS_METADATA_ITEM_KEY) {
7753                         path.slots[0]++;
7754                         continue;
7755                 }
7756
7757                 if (last == key.objectid) {
7758                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7759                                 last = key.objectid + key.offset;
7760                         else
7761                                 last = key.objectid + root->fs_info->nodesize;
7762                         path.slots[0]++;
7763                         continue;
7764                 }
7765
7766                 ret = check_cache_range(root, cache, last,
7767                                         key.objectid - last);
7768                 if (ret)
7769                         break;
7770                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7771                         last = key.objectid + key.offset;
7772                 else
7773                         last = key.objectid + root->fs_info->nodesize;
7774                 path.slots[0]++;
7775         }
7776
7777         if (last < cache->key.objectid + cache->key.offset)
7778                 ret = check_cache_range(root, cache, last,
7779                                         cache->key.objectid +
7780                                         cache->key.offset - last);
7781
7782 out:
7783         btrfs_release_path(&path);
7784
7785         if (!ret &&
7786             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7787                 fprintf(stderr, "There are still entries left in the space "
7788                         "cache\n");
7789                 ret = -EINVAL;
7790         }
7791
7792         return ret;
7793 }
7794
7795 static int check_space_cache(struct btrfs_root *root)
7796 {
7797         struct btrfs_block_group_cache *cache;
7798         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7799         int ret;
7800         int error = 0;
7801
7802         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7803             btrfs_super_generation(root->fs_info->super_copy) !=
7804             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7805                 printf("cache and super generation don't match, space cache "
7806                        "will be invalidated\n");
7807                 return 0;
7808         }
7809
7810         if (ctx.progress_enabled) {
7811                 ctx.tp = TASK_FREE_SPACE;
7812                 task_start(ctx.info);
7813         }
7814
7815         while (1) {
7816                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7817                 if (!cache)
7818                         break;
7819
7820                 start = cache->key.objectid + cache->key.offset;
7821                 if (!cache->free_space_ctl) {
7822                         if (btrfs_init_free_space_ctl(cache,
7823                                                 root->fs_info->sectorsize)) {
7824                                 ret = -ENOMEM;
7825                                 break;
7826                         }
7827                 } else {
7828                         btrfs_remove_free_space_cache(cache);
7829                 }
7830
7831                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7832                         ret = exclude_super_stripes(root, cache);
7833                         if (ret) {
7834                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7835                                         strerror(-ret));
7836                                 error++;
7837                                 continue;
7838                         }
7839                         ret = load_free_space_tree(root->fs_info, cache);
7840                         free_excluded_extents(root, cache);
7841                         if (ret < 0) {
7842                                 fprintf(stderr, "could not load free space tree: %s\n",
7843                                         strerror(-ret));
7844                                 error++;
7845                                 continue;
7846                         }
7847                         error += ret;
7848                 } else {
7849                         ret = load_free_space_cache(root->fs_info, cache);
7850                         if (!ret)
7851                                 continue;
7852                 }
7853
7854                 ret = verify_space_cache(root, cache);
7855                 if (ret) {
7856                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7857                                 cache->key.objectid);
7858                         error++;
7859                 }
7860         }
7861
7862         task_stop(ctx.info);
7863
7864         return error ? -EINVAL : 0;
7865 }
7866
7867 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7868                         u64 num_bytes, unsigned long leaf_offset,
7869                         struct extent_buffer *eb) {
7870
7871         struct btrfs_fs_info *fs_info = root->fs_info;
7872         u64 offset = 0;
7873         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7874         char *data;
7875         unsigned long csum_offset;
7876         u32 csum;
7877         u32 csum_expected;
7878         u64 read_len;
7879         u64 data_checked = 0;
7880         u64 tmp;
7881         int ret = 0;
7882         int mirror;
7883         int num_copies;
7884
7885         if (num_bytes % fs_info->sectorsize)
7886                 return -EINVAL;
7887
7888         data = malloc(num_bytes);
7889         if (!data)
7890                 return -ENOMEM;
7891
7892         while (offset < num_bytes) {
7893                 mirror = 0;
7894 again:
7895                 read_len = num_bytes - offset;
7896                 /* read as much space once a time */
7897                 ret = read_extent_data(fs_info, data + offset,
7898                                 bytenr + offset, &read_len, mirror);
7899                 if (ret)
7900                         goto out;
7901                 data_checked = 0;
7902                 /* verify every 4k data's checksum */
7903                 while (data_checked < read_len) {
7904                         csum = ~(u32)0;
7905                         tmp = offset + data_checked;
7906
7907                         csum = btrfs_csum_data((char *)data + tmp,
7908                                                csum, fs_info->sectorsize);
7909                         btrfs_csum_final(csum, (u8 *)&csum);
7910
7911                         csum_offset = leaf_offset +
7912                                  tmp / fs_info->sectorsize * csum_size;
7913                         read_extent_buffer(eb, (char *)&csum_expected,
7914                                            csum_offset, csum_size);
7915                         /* try another mirror */
7916                         if (csum != csum_expected) {
7917                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7918                                                 mirror, bytenr + tmp,
7919                                                 csum, csum_expected);
7920                                 num_copies = btrfs_num_copies(root->fs_info,
7921                                                 bytenr, num_bytes);
7922                                 if (mirror < num_copies - 1) {
7923                                         mirror += 1;
7924                                         goto again;
7925                                 }
7926                         }
7927                         data_checked += fs_info->sectorsize;
7928                 }
7929                 offset += read_len;
7930         }
7931 out:
7932         free(data);
7933         return ret;
7934 }
7935
7936 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7937                                u64 num_bytes)
7938 {
7939         struct btrfs_path path;
7940         struct extent_buffer *leaf;
7941         struct btrfs_key key;
7942         int ret;
7943
7944         btrfs_init_path(&path);
7945         key.objectid = bytenr;
7946         key.type = BTRFS_EXTENT_ITEM_KEY;
7947         key.offset = (u64)-1;
7948
7949 again:
7950         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7951                                 0, 0);
7952         if (ret < 0) {
7953                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7954                 btrfs_release_path(&path);
7955                 return ret;
7956         } else if (ret) {
7957                 if (path.slots[0] > 0) {
7958                         path.slots[0]--;
7959                 } else {
7960                         ret = btrfs_prev_leaf(root, &path);
7961                         if (ret < 0) {
7962                                 goto out;
7963                         } else if (ret > 0) {
7964                                 ret = 0;
7965                                 goto out;
7966                         }
7967                 }
7968         }
7969
7970         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7971
7972         /*
7973          * Block group items come before extent items if they have the same
7974          * bytenr, so walk back one more just in case.  Dear future traveller,
7975          * first congrats on mastering time travel.  Now if it's not too much
7976          * trouble could you go back to 2006 and tell Chris to make the
7977          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7978          * EXTENT_ITEM_KEY please?
7979          */
7980         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7981                 if (path.slots[0] > 0) {
7982                         path.slots[0]--;
7983                 } else {
7984                         ret = btrfs_prev_leaf(root, &path);
7985                         if (ret < 0) {
7986                                 goto out;
7987                         } else if (ret > 0) {
7988                                 ret = 0;
7989                                 goto out;
7990                         }
7991                 }
7992                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7993         }
7994
7995         while (num_bytes) {
7996                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7997                         ret = btrfs_next_leaf(root, &path);
7998                         if (ret < 0) {
7999                                 fprintf(stderr, "Error going to next leaf "
8000                                         "%d\n", ret);
8001                                 btrfs_release_path(&path);
8002                                 return ret;
8003                         } else if (ret) {
8004                                 break;
8005                         }
8006                 }
8007                 leaf = path.nodes[0];
8008                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8009                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8010                         path.slots[0]++;
8011                         continue;
8012                 }
8013                 if (key.objectid + key.offset < bytenr) {
8014                         path.slots[0]++;
8015                         continue;
8016                 }
8017                 if (key.objectid > bytenr + num_bytes)
8018                         break;
8019
8020                 if (key.objectid == bytenr) {
8021                         if (key.offset >= num_bytes) {
8022                                 num_bytes = 0;
8023                                 break;
8024                         }
8025                         num_bytes -= key.offset;
8026                         bytenr += key.offset;
8027                 } else if (key.objectid < bytenr) {
8028                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8029                                 num_bytes = 0;
8030                                 break;
8031                         }
8032                         num_bytes = (bytenr + num_bytes) -
8033                                 (key.objectid + key.offset);
8034                         bytenr = key.objectid + key.offset;
8035                 } else {
8036                         if (key.objectid + key.offset < bytenr + num_bytes) {
8037                                 u64 new_start = key.objectid + key.offset;
8038                                 u64 new_bytes = bytenr + num_bytes - new_start;
8039
8040                                 /*
8041                                  * Weird case, the extent is in the middle of
8042                                  * our range, we'll have to search one side
8043                                  * and then the other.  Not sure if this happens
8044                                  * in real life, but no harm in coding it up
8045                                  * anyway just in case.
8046                                  */
8047                                 btrfs_release_path(&path);
8048                                 ret = check_extent_exists(root, new_start,
8049                                                           new_bytes);
8050                                 if (ret) {
8051                                         fprintf(stderr, "Right section didn't "
8052                                                 "have a record\n");
8053                                         break;
8054                                 }
8055                                 num_bytes = key.objectid - bytenr;
8056                                 goto again;
8057                         }
8058                         num_bytes = key.objectid - bytenr;
8059                 }
8060                 path.slots[0]++;
8061         }
8062         ret = 0;
8063
8064 out:
8065         if (num_bytes && !ret) {
8066                 fprintf(stderr, "There are no extents for csum range "
8067                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8068                 ret = 1;
8069         }
8070
8071         btrfs_release_path(&path);
8072         return ret;
8073 }
8074
8075 static int check_csums(struct btrfs_root *root)
8076 {
8077         struct btrfs_path path;
8078         struct extent_buffer *leaf;
8079         struct btrfs_key key;
8080         u64 offset = 0, num_bytes = 0;
8081         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8082         int errors = 0;
8083         int ret;
8084         u64 data_len;
8085         unsigned long leaf_offset;
8086
8087         root = root->fs_info->csum_root;
8088         if (!extent_buffer_uptodate(root->node)) {
8089                 fprintf(stderr, "No valid csum tree found\n");
8090                 return -ENOENT;
8091         }
8092
8093         btrfs_init_path(&path);
8094         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8095         key.type = BTRFS_EXTENT_CSUM_KEY;
8096         key.offset = 0;
8097         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8098         if (ret < 0) {
8099                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8100                 btrfs_release_path(&path);
8101                 return ret;
8102         }
8103
8104         if (ret > 0 && path.slots[0])
8105                 path.slots[0]--;
8106         ret = 0;
8107
8108         while (1) {
8109                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8110                         ret = btrfs_next_leaf(root, &path);
8111                         if (ret < 0) {
8112                                 fprintf(stderr, "Error going to next leaf "
8113                                         "%d\n", ret);
8114                                 break;
8115                         }
8116                         if (ret)
8117                                 break;
8118                 }
8119                 leaf = path.nodes[0];
8120
8121                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8122                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8123                         path.slots[0]++;
8124                         continue;
8125                 }
8126
8127                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8128                               csum_size) * root->fs_info->sectorsize;
8129                 if (!check_data_csum)
8130                         goto skip_csum_check;
8131                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8132                 ret = check_extent_csums(root, key.offset, data_len,
8133                                          leaf_offset, leaf);
8134                 if (ret)
8135                         break;
8136 skip_csum_check:
8137                 if (!num_bytes) {
8138                         offset = key.offset;
8139                 } else if (key.offset != offset + num_bytes) {
8140                         ret = check_extent_exists(root, offset, num_bytes);
8141                         if (ret) {
8142                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8143                                         "there is no extent record\n",
8144                                         offset, offset+num_bytes);
8145                                 errors++;
8146                         }
8147                         offset = key.offset;
8148                         num_bytes = 0;
8149                 }
8150                 num_bytes += data_len;
8151                 path.slots[0]++;
8152         }
8153
8154         btrfs_release_path(&path);
8155         return errors;
8156 }
8157
8158 static int is_dropped_key(struct btrfs_key *key,
8159                           struct btrfs_key *drop_key) {
8160         if (key->objectid < drop_key->objectid)
8161                 return 1;
8162         else if (key->objectid == drop_key->objectid) {
8163                 if (key->type < drop_key->type)
8164                         return 1;
8165                 else if (key->type == drop_key->type) {
8166                         if (key->offset < drop_key->offset)
8167                                 return 1;
8168                 }
8169         }
8170         return 0;
8171 }
8172
8173 /*
8174  * Here are the rules for FULL_BACKREF.
8175  *
8176  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8177  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8178  *      FULL_BACKREF set.
8179  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8180  *    if it happened after the relocation occurred since we'll have dropped the
8181  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8182  *    have no real way to know for sure.
8183  *
8184  * We process the blocks one root at a time, and we start from the lowest root
8185  * objectid and go to the highest.  So we can just lookup the owner backref for
8186  * the record and if we don't find it then we know it doesn't exist and we have
8187  * a FULL BACKREF.
8188  *
8189  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8190  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8191  * be set or not and then we can check later once we've gathered all the refs.
8192  */
8193 static int calc_extent_flag(struct cache_tree *extent_cache,
8194                            struct extent_buffer *buf,
8195                            struct root_item_record *ri,
8196                            u64 *flags)
8197 {
8198         struct extent_record *rec;
8199         struct cache_extent *cache;
8200         struct tree_backref *tback;
8201         u64 owner = 0;
8202
8203         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8204         /* we have added this extent before */
8205         if (!cache)
8206                 return -ENOENT;
8207
8208         rec = container_of(cache, struct extent_record, cache);
8209
8210         /*
8211          * Except file/reloc tree, we can not have
8212          * FULL BACKREF MODE
8213          */
8214         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8215                 goto normal;
8216         /*
8217          * root node
8218          */
8219         if (buf->start == ri->bytenr)
8220                 goto normal;
8221
8222         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8223                 goto full_backref;
8224
8225         owner = btrfs_header_owner(buf);
8226         if (owner == ri->objectid)
8227                 goto normal;
8228
8229         tback = find_tree_backref(rec, 0, owner);
8230         if (!tback)
8231                 goto full_backref;
8232 normal:
8233         *flags = 0;
8234         if (rec->flag_block_full_backref != FLAG_UNSET &&
8235             rec->flag_block_full_backref != 0)
8236                 rec->bad_full_backref = 1;
8237         return 0;
8238 full_backref:
8239         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8240         if (rec->flag_block_full_backref != FLAG_UNSET &&
8241             rec->flag_block_full_backref != 1)
8242                 rec->bad_full_backref = 1;
8243         return 0;
8244 }
8245
8246 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8247 {
8248         fprintf(stderr, "Invalid key type(");
8249         print_key_type(stderr, 0, key_type);
8250         fprintf(stderr, ") found in root(");
8251         print_objectid(stderr, rootid, 0);
8252         fprintf(stderr, ")\n");
8253 }
8254
8255 /*
8256  * Check if the key is valid with its extent buffer.
8257  *
8258  * This is a early check in case invalid key exists in a extent buffer
8259  * This is not comprehensive yet, but should prevent wrong key/item passed
8260  * further
8261  */
8262 static int check_type_with_root(u64 rootid, u8 key_type)
8263 {
8264         switch (key_type) {
8265         /* Only valid in chunk tree */
8266         case BTRFS_DEV_ITEM_KEY:
8267         case BTRFS_CHUNK_ITEM_KEY:
8268                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8269                         goto err;
8270                 break;
8271         /* valid in csum and log tree */
8272         case BTRFS_CSUM_TREE_OBJECTID:
8273                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8274                       is_fstree(rootid)))
8275                         goto err;
8276                 break;
8277         case BTRFS_EXTENT_ITEM_KEY:
8278         case BTRFS_METADATA_ITEM_KEY:
8279         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8280                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8281                         goto err;
8282                 break;
8283         case BTRFS_ROOT_ITEM_KEY:
8284                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8285                         goto err;
8286                 break;
8287         case BTRFS_DEV_EXTENT_KEY:
8288                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8289                         goto err;
8290                 break;
8291         }
8292         return 0;
8293 err:
8294         report_mismatch_key_root(key_type, rootid);
8295         return -EINVAL;
8296 }
8297
8298 static int run_next_block(struct btrfs_root *root,
8299                           struct block_info *bits,
8300                           int bits_nr,
8301                           u64 *last,
8302                           struct cache_tree *pending,
8303                           struct cache_tree *seen,
8304                           struct cache_tree *reada,
8305                           struct cache_tree *nodes,
8306                           struct cache_tree *extent_cache,
8307                           struct cache_tree *chunk_cache,
8308                           struct rb_root *dev_cache,
8309                           struct block_group_tree *block_group_cache,
8310                           struct device_extent_tree *dev_extent_cache,
8311                           struct root_item_record *ri)
8312 {
8313         struct btrfs_fs_info *fs_info = root->fs_info;
8314         struct extent_buffer *buf;
8315         struct extent_record *rec = NULL;
8316         u64 bytenr;
8317         u32 size;
8318         u64 parent;
8319         u64 owner;
8320         u64 flags;
8321         u64 ptr;
8322         u64 gen = 0;
8323         int ret = 0;
8324         int i;
8325         int nritems;
8326         struct btrfs_key key;
8327         struct cache_extent *cache;
8328         int reada_bits;
8329
8330         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8331                                     bits_nr, &reada_bits);
8332         if (nritems == 0)
8333                 return 1;
8334
8335         if (!reada_bits) {
8336                 for(i = 0; i < nritems; i++) {
8337                         ret = add_cache_extent(reada, bits[i].start,
8338                                                bits[i].size);
8339                         if (ret == -EEXIST)
8340                                 continue;
8341
8342                         /* fixme, get the parent transid */
8343                         readahead_tree_block(fs_info, bits[i].start, 0);
8344                 }
8345         }
8346         *last = bits[0].start;
8347         bytenr = bits[0].start;
8348         size = bits[0].size;
8349
8350         cache = lookup_cache_extent(pending, bytenr, size);
8351         if (cache) {
8352                 remove_cache_extent(pending, cache);
8353                 free(cache);
8354         }
8355         cache = lookup_cache_extent(reada, bytenr, size);
8356         if (cache) {
8357                 remove_cache_extent(reada, cache);
8358                 free(cache);
8359         }
8360         cache = lookup_cache_extent(nodes, bytenr, size);
8361         if (cache) {
8362                 remove_cache_extent(nodes, cache);
8363                 free(cache);
8364         }
8365         cache = lookup_cache_extent(extent_cache, bytenr, size);
8366         if (cache) {
8367                 rec = container_of(cache, struct extent_record, cache);
8368                 gen = rec->parent_generation;
8369         }
8370
8371         /* fixme, get the real parent transid */
8372         buf = read_tree_block(root->fs_info, bytenr, gen);
8373         if (!extent_buffer_uptodate(buf)) {
8374                 record_bad_block_io(root->fs_info,
8375                                     extent_cache, bytenr, size);
8376                 goto out;
8377         }
8378
8379         nritems = btrfs_header_nritems(buf);
8380
8381         flags = 0;
8382         if (!init_extent_tree) {
8383                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8384                                        btrfs_header_level(buf), 1, NULL,
8385                                        &flags);
8386                 if (ret < 0) {
8387                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8388                         if (ret < 0) {
8389                                 fprintf(stderr, "Couldn't calc extent flags\n");
8390                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8391                         }
8392                 }
8393         } else {
8394                 flags = 0;
8395                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8396                 if (ret < 0) {
8397                         fprintf(stderr, "Couldn't calc extent flags\n");
8398                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8399                 }
8400         }
8401
8402         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8403                 if (ri != NULL &&
8404                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8405                     ri->objectid == btrfs_header_owner(buf)) {
8406                         /*
8407                          * Ok we got to this block from it's original owner and
8408                          * we have FULL_BACKREF set.  Relocation can leave
8409                          * converted blocks over so this is altogether possible,
8410                          * however it's not possible if the generation > the
8411                          * last snapshot, so check for this case.
8412                          */
8413                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8414                             btrfs_header_generation(buf) > ri->last_snapshot) {
8415                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8416                                 rec->bad_full_backref = 1;
8417                         }
8418                 }
8419         } else {
8420                 if (ri != NULL &&
8421                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8422                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8423                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8424                         rec->bad_full_backref = 1;
8425                 }
8426         }
8427
8428         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8429                 rec->flag_block_full_backref = 1;
8430                 parent = bytenr;
8431                 owner = 0;
8432         } else {
8433                 rec->flag_block_full_backref = 0;
8434                 parent = 0;
8435                 owner = btrfs_header_owner(buf);
8436         }
8437
8438         ret = check_block(root, extent_cache, buf, flags);
8439         if (ret)
8440                 goto out;
8441
8442         if (btrfs_is_leaf(buf)) {
8443                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8444                 for (i = 0; i < nritems; i++) {
8445                         struct btrfs_file_extent_item *fi;
8446                         btrfs_item_key_to_cpu(buf, &key, i);
8447                         /*
8448                          * Check key type against the leaf owner.
8449                          * Could filter quite a lot of early error if
8450                          * owner is correct
8451                          */
8452                         if (check_type_with_root(btrfs_header_owner(buf),
8453                                                  key.type)) {
8454                                 fprintf(stderr, "ignoring invalid key\n");
8455                                 continue;
8456                         }
8457                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8458                                 process_extent_item(root, extent_cache, buf,
8459                                                     i);
8460                                 continue;
8461                         }
8462                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8463                                 process_extent_item(root, extent_cache, buf,
8464                                                     i);
8465                                 continue;
8466                         }
8467                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8468                                 total_csum_bytes +=
8469                                         btrfs_item_size_nr(buf, i);
8470                                 continue;
8471                         }
8472                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8473                                 process_chunk_item(chunk_cache, &key, buf, i);
8474                                 continue;
8475                         }
8476                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8477                                 process_device_item(dev_cache, &key, buf, i);
8478                                 continue;
8479                         }
8480                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8481                                 process_block_group_item(block_group_cache,
8482                                         &key, buf, i);
8483                                 continue;
8484                         }
8485                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8486                                 process_device_extent_item(dev_extent_cache,
8487                                         &key, buf, i);
8488                                 continue;
8489
8490                         }
8491                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8492 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8493                                 process_extent_ref_v0(extent_cache, buf, i);
8494 #else
8495                                 BUG();
8496 #endif
8497                                 continue;
8498                         }
8499
8500                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8501                                 ret = add_tree_backref(extent_cache,
8502                                                 key.objectid, 0, key.offset, 0);
8503                                 if (ret < 0)
8504                                         error(
8505                                 "add_tree_backref failed (leaf tree block): %s",
8506                                               strerror(-ret));
8507                                 continue;
8508                         }
8509                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8510                                 ret = add_tree_backref(extent_cache,
8511                                                 key.objectid, key.offset, 0, 0);
8512                                 if (ret < 0)
8513                                         error(
8514                                 "add_tree_backref failed (leaf shared block): %s",
8515                                               strerror(-ret));
8516                                 continue;
8517                         }
8518                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8519                                 struct btrfs_extent_data_ref *ref;
8520                                 ref = btrfs_item_ptr(buf, i,
8521                                                 struct btrfs_extent_data_ref);
8522                                 add_data_backref(extent_cache,
8523                                         key.objectid, 0,
8524                                         btrfs_extent_data_ref_root(buf, ref),
8525                                         btrfs_extent_data_ref_objectid(buf,
8526                                                                        ref),
8527                                         btrfs_extent_data_ref_offset(buf, ref),
8528                                         btrfs_extent_data_ref_count(buf, ref),
8529                                         0, root->fs_info->sectorsize);
8530                                 continue;
8531                         }
8532                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8533                                 struct btrfs_shared_data_ref *ref;
8534                                 ref = btrfs_item_ptr(buf, i,
8535                                                 struct btrfs_shared_data_ref);
8536                                 add_data_backref(extent_cache,
8537                                         key.objectid, key.offset, 0, 0, 0,
8538                                         btrfs_shared_data_ref_count(buf, ref),
8539                                         0, root->fs_info->sectorsize);
8540                                 continue;
8541                         }
8542                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8543                                 struct bad_item *bad;
8544
8545                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8546                                         continue;
8547                                 if (!owner)
8548                                         continue;
8549                                 bad = malloc(sizeof(struct bad_item));
8550                                 if (!bad)
8551                                         continue;
8552                                 INIT_LIST_HEAD(&bad->list);
8553                                 memcpy(&bad->key, &key,
8554                                        sizeof(struct btrfs_key));
8555                                 bad->root_id = owner;
8556                                 list_add_tail(&bad->list, &delete_items);
8557                                 continue;
8558                         }
8559                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8560                                 continue;
8561                         fi = btrfs_item_ptr(buf, i,
8562                                             struct btrfs_file_extent_item);
8563                         if (btrfs_file_extent_type(buf, fi) ==
8564                             BTRFS_FILE_EXTENT_INLINE)
8565                                 continue;
8566                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8567                                 continue;
8568
8569                         data_bytes_allocated +=
8570                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8571                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8572                                 abort();
8573                         }
8574                         data_bytes_referenced +=
8575                                 btrfs_file_extent_num_bytes(buf, fi);
8576                         add_data_backref(extent_cache,
8577                                 btrfs_file_extent_disk_bytenr(buf, fi),
8578                                 parent, owner, key.objectid, key.offset -
8579                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8580                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8581                 }
8582         } else {
8583                 int level;
8584                 struct btrfs_key first_key;
8585
8586                 first_key.objectid = 0;
8587
8588                 if (nritems > 0)
8589                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8590                 level = btrfs_header_level(buf);
8591                 for (i = 0; i < nritems; i++) {
8592                         struct extent_record tmpl;
8593
8594                         ptr = btrfs_node_blockptr(buf, i);
8595                         size = root->fs_info->nodesize;
8596                         btrfs_node_key_to_cpu(buf, &key, i);
8597                         if (ri != NULL) {
8598                                 if ((level == ri->drop_level)
8599                                     && is_dropped_key(&key, &ri->drop_key)) {
8600                                         continue;
8601                                 }
8602                         }
8603
8604                         memset(&tmpl, 0, sizeof(tmpl));
8605                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8606                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8607                         tmpl.start = ptr;
8608                         tmpl.nr = size;
8609                         tmpl.refs = 1;
8610                         tmpl.metadata = 1;
8611                         tmpl.max_size = size;
8612                         ret = add_extent_rec(extent_cache, &tmpl);
8613                         if (ret < 0)
8614                                 goto out;
8615
8616                         ret = add_tree_backref(extent_cache, ptr, parent,
8617                                         owner, 1);
8618                         if (ret < 0) {
8619                                 error(
8620                                 "add_tree_backref failed (non-leaf block): %s",
8621                                       strerror(-ret));
8622                                 continue;
8623                         }
8624
8625                         if (level > 1) {
8626                                 add_pending(nodes, seen, ptr, size);
8627                         } else {
8628                                 add_pending(pending, seen, ptr, size);
8629                         }
8630                 }
8631                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8632                                       nritems) * sizeof(struct btrfs_key_ptr);
8633         }
8634         total_btree_bytes += buf->len;
8635         if (fs_root_objectid(btrfs_header_owner(buf)))
8636                 total_fs_tree_bytes += buf->len;
8637         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8638                 total_extent_tree_bytes += buf->len;
8639 out:
8640         free_extent_buffer(buf);
8641         return ret;
8642 }
8643
8644 static int add_root_to_pending(struct extent_buffer *buf,
8645                                struct cache_tree *extent_cache,
8646                                struct cache_tree *pending,
8647                                struct cache_tree *seen,
8648                                struct cache_tree *nodes,
8649                                u64 objectid)
8650 {
8651         struct extent_record tmpl;
8652         int ret;
8653
8654         if (btrfs_header_level(buf) > 0)
8655                 add_pending(nodes, seen, buf->start, buf->len);
8656         else
8657                 add_pending(pending, seen, buf->start, buf->len);
8658
8659         memset(&tmpl, 0, sizeof(tmpl));
8660         tmpl.start = buf->start;
8661         tmpl.nr = buf->len;
8662         tmpl.is_root = 1;
8663         tmpl.refs = 1;
8664         tmpl.metadata = 1;
8665         tmpl.max_size = buf->len;
8666         add_extent_rec(extent_cache, &tmpl);
8667
8668         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8669             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8670                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8671                                 0, 1);
8672         else
8673                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8674                                 1);
8675         return ret;
8676 }
8677
8678 /* as we fix the tree, we might be deleting blocks that
8679  * we're tracking for repair.  This hook makes sure we
8680  * remove any backrefs for blocks as we are fixing them.
8681  */
8682 static int free_extent_hook(struct btrfs_trans_handle *trans,
8683                             struct btrfs_root *root,
8684                             u64 bytenr, u64 num_bytes, u64 parent,
8685                             u64 root_objectid, u64 owner, u64 offset,
8686                             int refs_to_drop)
8687 {
8688         struct extent_record *rec;
8689         struct cache_extent *cache;
8690         int is_data;
8691         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8692
8693         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8694         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8695         if (!cache)
8696                 return 0;
8697
8698         rec = container_of(cache, struct extent_record, cache);
8699         if (is_data) {
8700                 struct data_backref *back;
8701                 back = find_data_backref(rec, parent, root_objectid, owner,
8702                                          offset, 1, bytenr, num_bytes);
8703                 if (!back)
8704                         goto out;
8705                 if (back->node.found_ref) {
8706                         back->found_ref -= refs_to_drop;
8707                         if (rec->refs)
8708                                 rec->refs -= refs_to_drop;
8709                 }
8710                 if (back->node.found_extent_tree) {
8711                         back->num_refs -= refs_to_drop;
8712                         if (rec->extent_item_refs)
8713                                 rec->extent_item_refs -= refs_to_drop;
8714                 }
8715                 if (back->found_ref == 0)
8716                         back->node.found_ref = 0;
8717                 if (back->num_refs == 0)
8718                         back->node.found_extent_tree = 0;
8719
8720                 if (!back->node.found_extent_tree && back->node.found_ref) {
8721                         rb_erase(&back->node.node, &rec->backref_tree);
8722                         free(back);
8723                 }
8724         } else {
8725                 struct tree_backref *back;
8726                 back = find_tree_backref(rec, parent, root_objectid);
8727                 if (!back)
8728                         goto out;
8729                 if (back->node.found_ref) {
8730                         if (rec->refs)
8731                                 rec->refs--;
8732                         back->node.found_ref = 0;
8733                 }
8734                 if (back->node.found_extent_tree) {
8735                         if (rec->extent_item_refs)
8736                                 rec->extent_item_refs--;
8737                         back->node.found_extent_tree = 0;
8738                 }
8739                 if (!back->node.found_extent_tree && back->node.found_ref) {
8740                         rb_erase(&back->node.node, &rec->backref_tree);
8741                         free(back);
8742                 }
8743         }
8744         maybe_free_extent_rec(extent_cache, rec);
8745 out:
8746         return 0;
8747 }
8748
8749 static int delete_extent_records(struct btrfs_trans_handle *trans,
8750                                  struct btrfs_root *root,
8751                                  struct btrfs_path *path,
8752                                  u64 bytenr)
8753 {
8754         struct btrfs_key key;
8755         struct btrfs_key found_key;
8756         struct extent_buffer *leaf;
8757         int ret;
8758         int slot;
8759
8760
8761         key.objectid = bytenr;
8762         key.type = (u8)-1;
8763         key.offset = (u64)-1;
8764
8765         while(1) {
8766                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8767                                         &key, path, 0, 1);
8768                 if (ret < 0)
8769                         break;
8770
8771                 if (ret > 0) {
8772                         ret = 0;
8773                         if (path->slots[0] == 0)
8774                                 break;
8775                         path->slots[0]--;
8776                 }
8777                 ret = 0;
8778
8779                 leaf = path->nodes[0];
8780                 slot = path->slots[0];
8781
8782                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8783                 if (found_key.objectid != bytenr)
8784                         break;
8785
8786                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8787                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8788                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8789                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8790                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8791                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8792                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8793                         btrfs_release_path(path);
8794                         if (found_key.type == 0) {
8795                                 if (found_key.offset == 0)
8796                                         break;
8797                                 key.offset = found_key.offset - 1;
8798                                 key.type = found_key.type;
8799                         }
8800                         key.type = found_key.type - 1;
8801                         key.offset = (u64)-1;
8802                         continue;
8803                 }
8804
8805                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8806                         found_key.objectid, found_key.type, found_key.offset);
8807
8808                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8809                 if (ret)
8810                         break;
8811                 btrfs_release_path(path);
8812
8813                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8814                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8815                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8816                                 found_key.offset : root->fs_info->nodesize;
8817
8818                         ret = btrfs_update_block_group(trans, root, bytenr,
8819                                                        bytes, 0, 0);
8820                         if (ret)
8821                                 break;
8822                 }
8823         }
8824
8825         btrfs_release_path(path);
8826         return ret;
8827 }
8828
8829 /*
8830  * for a single backref, this will allocate a new extent
8831  * and add the backref to it.
8832  */
8833 static int record_extent(struct btrfs_trans_handle *trans,
8834                          struct btrfs_fs_info *info,
8835                          struct btrfs_path *path,
8836                          struct extent_record *rec,
8837                          struct extent_backref *back,
8838                          int allocated, u64 flags)
8839 {
8840         int ret = 0;
8841         struct btrfs_root *extent_root = info->extent_root;
8842         struct extent_buffer *leaf;
8843         struct btrfs_key ins_key;
8844         struct btrfs_extent_item *ei;
8845         struct data_backref *dback;
8846         struct btrfs_tree_block_info *bi;
8847
8848         if (!back->is_data)
8849                 rec->max_size = max_t(u64, rec->max_size,
8850                                     info->nodesize);
8851
8852         if (!allocated) {
8853                 u32 item_size = sizeof(*ei);
8854
8855                 if (!back->is_data)
8856                         item_size += sizeof(*bi);
8857
8858                 ins_key.objectid = rec->start;
8859                 ins_key.offset = rec->max_size;
8860                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8861
8862                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8863                                         &ins_key, item_size);
8864                 if (ret)
8865                         goto fail;
8866
8867                 leaf = path->nodes[0];
8868                 ei = btrfs_item_ptr(leaf, path->slots[0],
8869                                     struct btrfs_extent_item);
8870
8871                 btrfs_set_extent_refs(leaf, ei, 0);
8872                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8873
8874                 if (back->is_data) {
8875                         btrfs_set_extent_flags(leaf, ei,
8876                                                BTRFS_EXTENT_FLAG_DATA);
8877                 } else {
8878                         struct btrfs_disk_key copy_key;;
8879
8880                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8881                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8882                                              sizeof(*bi));
8883
8884                         btrfs_set_disk_key_objectid(&copy_key,
8885                                                     rec->info_objectid);
8886                         btrfs_set_disk_key_type(&copy_key, 0);
8887                         btrfs_set_disk_key_offset(&copy_key, 0);
8888
8889                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8890                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8891
8892                         btrfs_set_extent_flags(leaf, ei,
8893                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8894                 }
8895
8896                 btrfs_mark_buffer_dirty(leaf);
8897                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8898                                                rec->max_size, 1, 0);
8899                 if (ret)
8900                         goto fail;
8901                 btrfs_release_path(path);
8902         }
8903
8904         if (back->is_data) {
8905                 u64 parent;
8906                 int i;
8907
8908                 dback = to_data_backref(back);
8909                 if (back->full_backref)
8910                         parent = dback->parent;
8911                 else
8912                         parent = 0;
8913
8914                 for (i = 0; i < dback->found_ref; i++) {
8915                         /* if parent != 0, we're doing a full backref
8916                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8917                          * just makes the backref allocator create a data
8918                          * backref
8919                          */
8920                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8921                                                    rec->start, rec->max_size,
8922                                                    parent,
8923                                                    dback->root,
8924                                                    parent ?
8925                                                    BTRFS_FIRST_FREE_OBJECTID :
8926                                                    dback->owner,
8927                                                    dback->offset);
8928                         if (ret)
8929                                 break;
8930                 }
8931                 fprintf(stderr, "adding new data backref"
8932                                 " on %llu %s %llu owner %llu"
8933                                 " offset %llu found %d\n",
8934                                 (unsigned long long)rec->start,
8935                                 back->full_backref ?
8936                                 "parent" : "root",
8937                                 back->full_backref ?
8938                                 (unsigned long long)parent :
8939                                 (unsigned long long)dback->root,
8940                                 (unsigned long long)dback->owner,
8941                                 (unsigned long long)dback->offset,
8942                                 dback->found_ref);
8943         } else {
8944                 u64 parent;
8945                 struct tree_backref *tback;
8946
8947                 tback = to_tree_backref(back);
8948                 if (back->full_backref)
8949                         parent = tback->parent;
8950                 else
8951                         parent = 0;
8952
8953                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8954                                            rec->start, rec->max_size,
8955                                            parent, tback->root, 0, 0);
8956                 fprintf(stderr, "adding new tree backref on "
8957                         "start %llu len %llu parent %llu root %llu\n",
8958                         rec->start, rec->max_size, parent, tback->root);
8959         }
8960 fail:
8961         btrfs_release_path(path);
8962         return ret;
8963 }
8964
8965 static struct extent_entry *find_entry(struct list_head *entries,
8966                                        u64 bytenr, u64 bytes)
8967 {
8968         struct extent_entry *entry = NULL;
8969
8970         list_for_each_entry(entry, entries, list) {
8971                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8972                         return entry;
8973         }
8974
8975         return NULL;
8976 }
8977
8978 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8979 {
8980         struct extent_entry *entry, *best = NULL, *prev = NULL;
8981
8982         list_for_each_entry(entry, entries, list) {
8983                 /*
8984                  * If there are as many broken entries as entries then we know
8985                  * not to trust this particular entry.
8986                  */
8987                 if (entry->broken == entry->count)
8988                         continue;
8989
8990                 /*
8991                  * Special case, when there are only two entries and 'best' is
8992                  * the first one
8993                  */
8994                 if (!prev) {
8995                         best = entry;
8996                         prev = entry;
8997                         continue;
8998                 }
8999
9000                 /*
9001                  * If our current entry == best then we can't be sure our best
9002                  * is really the best, so we need to keep searching.
9003                  */
9004                 if (best && best->count == entry->count) {
9005                         prev = entry;
9006                         best = NULL;
9007                         continue;
9008                 }
9009
9010                 /* Prev == entry, not good enough, have to keep searching */
9011                 if (!prev->broken && prev->count == entry->count)
9012                         continue;
9013
9014                 if (!best)
9015                         best = (prev->count > entry->count) ? prev : entry;
9016                 else if (best->count < entry->count)
9017                         best = entry;
9018                 prev = entry;
9019         }
9020
9021         return best;
9022 }
9023
9024 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9025                       struct data_backref *dback, struct extent_entry *entry)
9026 {
9027         struct btrfs_trans_handle *trans;
9028         struct btrfs_root *root;
9029         struct btrfs_file_extent_item *fi;
9030         struct extent_buffer *leaf;
9031         struct btrfs_key key;
9032         u64 bytenr, bytes;
9033         int ret, err;
9034
9035         key.objectid = dback->root;
9036         key.type = BTRFS_ROOT_ITEM_KEY;
9037         key.offset = (u64)-1;
9038         root = btrfs_read_fs_root(info, &key);
9039         if (IS_ERR(root)) {
9040                 fprintf(stderr, "Couldn't find root for our ref\n");
9041                 return -EINVAL;
9042         }
9043
9044         /*
9045          * The backref points to the original offset of the extent if it was
9046          * split, so we need to search down to the offset we have and then walk
9047          * forward until we find the backref we're looking for.
9048          */
9049         key.objectid = dback->owner;
9050         key.type = BTRFS_EXTENT_DATA_KEY;
9051         key.offset = dback->offset;
9052         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9053         if (ret < 0) {
9054                 fprintf(stderr, "Error looking up ref %d\n", ret);
9055                 return ret;
9056         }
9057
9058         while (1) {
9059                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9060                         ret = btrfs_next_leaf(root, path);
9061                         if (ret) {
9062                                 fprintf(stderr, "Couldn't find our ref, next\n");
9063                                 return -EINVAL;
9064                         }
9065                 }
9066                 leaf = path->nodes[0];
9067                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9068                 if (key.objectid != dback->owner ||
9069                     key.type != BTRFS_EXTENT_DATA_KEY) {
9070                         fprintf(stderr, "Couldn't find our ref, search\n");
9071                         return -EINVAL;
9072                 }
9073                 fi = btrfs_item_ptr(leaf, path->slots[0],
9074                                     struct btrfs_file_extent_item);
9075                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9076                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9077
9078                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9079                         break;
9080                 path->slots[0]++;
9081         }
9082
9083         btrfs_release_path(path);
9084
9085         trans = btrfs_start_transaction(root, 1);
9086         if (IS_ERR(trans))
9087                 return PTR_ERR(trans);
9088
9089         /*
9090          * Ok we have the key of the file extent we want to fix, now we can cow
9091          * down to the thing and fix it.
9092          */
9093         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9094         if (ret < 0) {
9095                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9096                         key.objectid, key.type, key.offset, ret);
9097                 goto out;
9098         }
9099         if (ret > 0) {
9100                 fprintf(stderr, "Well that's odd, we just found this key "
9101                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9102                         key.offset);
9103                 ret = -EINVAL;
9104                 goto out;
9105         }
9106         leaf = path->nodes[0];
9107         fi = btrfs_item_ptr(leaf, path->slots[0],
9108                             struct btrfs_file_extent_item);
9109
9110         if (btrfs_file_extent_compression(leaf, fi) &&
9111             dback->disk_bytenr != entry->bytenr) {
9112                 fprintf(stderr, "Ref doesn't match the record start and is "
9113                         "compressed, please take a btrfs-image of this file "
9114                         "system and send it to a btrfs developer so they can "
9115                         "complete this functionality for bytenr %Lu\n",
9116                         dback->disk_bytenr);
9117                 ret = -EINVAL;
9118                 goto out;
9119         }
9120
9121         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9122                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9123         } else if (dback->disk_bytenr > entry->bytenr) {
9124                 u64 off_diff, offset;
9125
9126                 off_diff = dback->disk_bytenr - entry->bytenr;
9127                 offset = btrfs_file_extent_offset(leaf, fi);
9128                 if (dback->disk_bytenr + offset +
9129                     btrfs_file_extent_num_bytes(leaf, fi) >
9130                     entry->bytenr + entry->bytes) {
9131                         fprintf(stderr, "Ref is past the entry end, please "
9132                                 "take a btrfs-image of this file system and "
9133                                 "send it to a btrfs developer, ref %Lu\n",
9134                                 dback->disk_bytenr);
9135                         ret = -EINVAL;
9136                         goto out;
9137                 }
9138                 offset += off_diff;
9139                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9140                 btrfs_set_file_extent_offset(leaf, fi, offset);
9141         } else if (dback->disk_bytenr < entry->bytenr) {
9142                 u64 offset;
9143
9144                 offset = btrfs_file_extent_offset(leaf, fi);
9145                 if (dback->disk_bytenr + offset < entry->bytenr) {
9146                         fprintf(stderr, "Ref is before the entry start, please"
9147                                 " take a btrfs-image of this file system and "
9148                                 "send it to a btrfs developer, ref %Lu\n",
9149                                 dback->disk_bytenr);
9150                         ret = -EINVAL;
9151                         goto out;
9152                 }
9153
9154                 offset += dback->disk_bytenr;
9155                 offset -= entry->bytenr;
9156                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9157                 btrfs_set_file_extent_offset(leaf, fi, offset);
9158         }
9159
9160         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9161
9162         /*
9163          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9164          * only do this if we aren't using compression, otherwise it's a
9165          * trickier case.
9166          */
9167         if (!btrfs_file_extent_compression(leaf, fi))
9168                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9169         else
9170                 printf("ram bytes may be wrong?\n");
9171         btrfs_mark_buffer_dirty(leaf);
9172 out:
9173         err = btrfs_commit_transaction(trans, root);
9174         btrfs_release_path(path);
9175         return ret ? ret : err;
9176 }
9177
9178 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9179                            struct extent_record *rec)
9180 {
9181         struct extent_backref *back, *tmp;
9182         struct data_backref *dback;
9183         struct extent_entry *entry, *best = NULL;
9184         LIST_HEAD(entries);
9185         int nr_entries = 0;
9186         int broken_entries = 0;
9187         int ret = 0;
9188         short mismatch = 0;
9189
9190         /*
9191          * Metadata is easy and the backrefs should always agree on bytenr and
9192          * size, if not we've got bigger issues.
9193          */
9194         if (rec->metadata)
9195                 return 0;
9196
9197         rbtree_postorder_for_each_entry_safe(back, tmp,
9198                                              &rec->backref_tree, node) {
9199                 if (back->full_backref || !back->is_data)
9200                         continue;
9201
9202                 dback = to_data_backref(back);
9203
9204                 /*
9205                  * We only pay attention to backrefs that we found a real
9206                  * backref for.
9207                  */
9208                 if (dback->found_ref == 0)
9209                         continue;
9210
9211                 /*
9212                  * For now we only catch when the bytes don't match, not the
9213                  * bytenr.  We can easily do this at the same time, but I want
9214                  * to have a fs image to test on before we just add repair
9215                  * functionality willy-nilly so we know we won't screw up the
9216                  * repair.
9217                  */
9218
9219                 entry = find_entry(&entries, dback->disk_bytenr,
9220                                    dback->bytes);
9221                 if (!entry) {
9222                         entry = malloc(sizeof(struct extent_entry));
9223                         if (!entry) {
9224                                 ret = -ENOMEM;
9225                                 goto out;
9226                         }
9227                         memset(entry, 0, sizeof(*entry));
9228                         entry->bytenr = dback->disk_bytenr;
9229                         entry->bytes = dback->bytes;
9230                         list_add_tail(&entry->list, &entries);
9231                         nr_entries++;
9232                 }
9233
9234                 /*
9235                  * If we only have on entry we may think the entries agree when
9236                  * in reality they don't so we have to do some extra checking.
9237                  */
9238                 if (dback->disk_bytenr != rec->start ||
9239                     dback->bytes != rec->nr || back->broken)
9240                         mismatch = 1;
9241
9242                 if (back->broken) {
9243                         entry->broken++;
9244                         broken_entries++;
9245                 }
9246
9247                 entry->count++;
9248         }
9249
9250         /* Yay all the backrefs agree, carry on good sir */
9251         if (nr_entries <= 1 && !mismatch)
9252                 goto out;
9253
9254         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9255                 "%Lu\n", rec->start);
9256
9257         /*
9258          * First we want to see if the backrefs can agree amongst themselves who
9259          * is right, so figure out which one of the entries has the highest
9260          * count.
9261          */
9262         best = find_most_right_entry(&entries);
9263
9264         /*
9265          * Ok so we may have an even split between what the backrefs think, so
9266          * this is where we use the extent ref to see what it thinks.
9267          */
9268         if (!best) {
9269                 entry = find_entry(&entries, rec->start, rec->nr);
9270                 if (!entry && (!broken_entries || !rec->found_rec)) {
9271                         fprintf(stderr, "Backrefs don't agree with each other "
9272                                 "and extent record doesn't agree with anybody,"
9273                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9274                                 rec->start, rec->nr);
9275                         ret = -EINVAL;
9276                         goto out;
9277                 } else if (!entry) {
9278                         /*
9279                          * Ok our backrefs were broken, we'll assume this is the
9280                          * correct value and add an entry for this range.
9281                          */
9282                         entry = malloc(sizeof(struct extent_entry));
9283                         if (!entry) {
9284                                 ret = -ENOMEM;
9285                                 goto out;
9286                         }
9287                         memset(entry, 0, sizeof(*entry));
9288                         entry->bytenr = rec->start;
9289                         entry->bytes = rec->nr;
9290                         list_add_tail(&entry->list, &entries);
9291                         nr_entries++;
9292                 }
9293                 entry->count++;
9294                 best = find_most_right_entry(&entries);
9295                 if (!best) {
9296                         fprintf(stderr, "Backrefs and extent record evenly "
9297                                 "split on who is right, this is going to "
9298                                 "require user input to fix bytenr %Lu bytes "
9299                                 "%Lu\n", rec->start, rec->nr);
9300                         ret = -EINVAL;
9301                         goto out;
9302                 }
9303         }
9304
9305         /*
9306          * I don't think this can happen currently as we'll abort() if we catch
9307          * this case higher up, but in case somebody removes that we still can't
9308          * deal with it properly here yet, so just bail out of that's the case.
9309          */
9310         if (best->bytenr != rec->start) {
9311                 fprintf(stderr, "Extent start and backref starts don't match, "
9312                         "please use btrfs-image on this file system and send "
9313                         "it to a btrfs developer so they can make fsck fix "
9314                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9315                         rec->start, rec->nr);
9316                 ret = -EINVAL;
9317                 goto out;
9318         }
9319
9320         /*
9321          * Ok great we all agreed on an extent record, let's go find the real
9322          * references and fix up the ones that don't match.
9323          */
9324         rbtree_postorder_for_each_entry_safe(back, tmp,
9325                                              &rec->backref_tree, node) {
9326                 if (back->full_backref || !back->is_data)
9327                         continue;
9328
9329                 dback = to_data_backref(back);
9330
9331                 /*
9332                  * Still ignoring backrefs that don't have a real ref attached
9333                  * to them.
9334                  */
9335                 if (dback->found_ref == 0)
9336                         continue;
9337
9338                 if (dback->bytes == best->bytes &&
9339                     dback->disk_bytenr == best->bytenr)
9340                         continue;
9341
9342                 ret = repair_ref(info, path, dback, best);
9343                 if (ret)
9344                         goto out;
9345         }
9346
9347         /*
9348          * Ok we messed with the actual refs, which means we need to drop our
9349          * entire cache and go back and rescan.  I know this is a huge pain and
9350          * adds a lot of extra work, but it's the only way to be safe.  Once all
9351          * the backrefs agree we may not need to do anything to the extent
9352          * record itself.
9353          */
9354         ret = -EAGAIN;
9355 out:
9356         while (!list_empty(&entries)) {
9357                 entry = list_entry(entries.next, struct extent_entry, list);
9358                 list_del_init(&entry->list);
9359                 free(entry);
9360         }
9361         return ret;
9362 }
9363
9364 static int process_duplicates(struct cache_tree *extent_cache,
9365                               struct extent_record *rec)
9366 {
9367         struct extent_record *good, *tmp;
9368         struct cache_extent *cache;
9369         int ret;
9370
9371         /*
9372          * If we found a extent record for this extent then return, or if we
9373          * have more than one duplicate we are likely going to need to delete
9374          * something.
9375          */
9376         if (rec->found_rec || rec->num_duplicates > 1)
9377                 return 0;
9378
9379         /* Shouldn't happen but just in case */
9380         BUG_ON(!rec->num_duplicates);
9381
9382         /*
9383          * So this happens if we end up with a backref that doesn't match the
9384          * actual extent entry.  So either the backref is bad or the extent
9385          * entry is bad.  Either way we want to have the extent_record actually
9386          * reflect what we found in the extent_tree, so we need to take the
9387          * duplicate out and use that as the extent_record since the only way we
9388          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9389          */
9390         remove_cache_extent(extent_cache, &rec->cache);
9391
9392         good = to_extent_record(rec->dups.next);
9393         list_del_init(&good->list);
9394         INIT_LIST_HEAD(&good->backrefs);
9395         INIT_LIST_HEAD(&good->dups);
9396         good->cache.start = good->start;
9397         good->cache.size = good->nr;
9398         good->content_checked = 0;
9399         good->owner_ref_checked = 0;
9400         good->num_duplicates = 0;
9401         good->refs = rec->refs;
9402         list_splice_init(&rec->backrefs, &good->backrefs);
9403         while (1) {
9404                 cache = lookup_cache_extent(extent_cache, good->start,
9405                                             good->nr);
9406                 if (!cache)
9407                         break;
9408                 tmp = container_of(cache, struct extent_record, cache);
9409
9410                 /*
9411                  * If we find another overlapping extent and it's found_rec is
9412                  * set then it's a duplicate and we need to try and delete
9413                  * something.
9414                  */
9415                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9416                         if (list_empty(&good->list))
9417                                 list_add_tail(&good->list,
9418                                               &duplicate_extents);
9419                         good->num_duplicates += tmp->num_duplicates + 1;
9420                         list_splice_init(&tmp->dups, &good->dups);
9421                         list_del_init(&tmp->list);
9422                         list_add_tail(&tmp->list, &good->dups);
9423                         remove_cache_extent(extent_cache, &tmp->cache);
9424                         continue;
9425                 }
9426
9427                 /*
9428                  * Ok we have another non extent item backed extent rec, so lets
9429                  * just add it to this extent and carry on like we did above.
9430                  */
9431                 good->refs += tmp->refs;
9432                 list_splice_init(&tmp->backrefs, &good->backrefs);
9433                 remove_cache_extent(extent_cache, &tmp->cache);
9434                 free(tmp);
9435         }
9436         ret = insert_cache_extent(extent_cache, &good->cache);
9437         BUG_ON(ret);
9438         free(rec);
9439         return good->num_duplicates ? 0 : 1;
9440 }
9441
9442 static int delete_duplicate_records(struct btrfs_root *root,
9443                                     struct extent_record *rec)
9444 {
9445         struct btrfs_trans_handle *trans;
9446         LIST_HEAD(delete_list);
9447         struct btrfs_path path;
9448         struct extent_record *tmp, *good, *n;
9449         int nr_del = 0;
9450         int ret = 0, err;
9451         struct btrfs_key key;
9452
9453         btrfs_init_path(&path);
9454
9455         good = rec;
9456         /* Find the record that covers all of the duplicates. */
9457         list_for_each_entry(tmp, &rec->dups, list) {
9458                 if (good->start < tmp->start)
9459                         continue;
9460                 if (good->nr > tmp->nr)
9461                         continue;
9462
9463                 if (tmp->start + tmp->nr < good->start + good->nr) {
9464                         fprintf(stderr, "Ok we have overlapping extents that "
9465                                 "aren't completely covered by each other, this "
9466                                 "is going to require more careful thought.  "
9467                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9468                                 tmp->start, tmp->nr, good->start, good->nr);
9469                         abort();
9470                 }
9471                 good = tmp;
9472         }
9473
9474         if (good != rec)
9475                 list_add_tail(&rec->list, &delete_list);
9476
9477         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9478                 if (tmp == good)
9479                         continue;
9480                 list_move_tail(&tmp->list, &delete_list);
9481         }
9482
9483         root = root->fs_info->extent_root;
9484         trans = btrfs_start_transaction(root, 1);
9485         if (IS_ERR(trans)) {
9486                 ret = PTR_ERR(trans);
9487                 goto out;
9488         }
9489
9490         list_for_each_entry(tmp, &delete_list, list) {
9491                 if (tmp->found_rec == 0)
9492                         continue;
9493                 key.objectid = tmp->start;
9494                 key.type = BTRFS_EXTENT_ITEM_KEY;
9495                 key.offset = tmp->nr;
9496
9497                 /* Shouldn't happen but just in case */
9498                 if (tmp->metadata) {
9499                         fprintf(stderr, "Well this shouldn't happen, extent "
9500                                 "record overlaps but is metadata? "
9501                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9502                         abort();
9503                 }
9504
9505                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9506                 if (ret) {
9507                         if (ret > 0)
9508                                 ret = -EINVAL;
9509                         break;
9510                 }
9511                 ret = btrfs_del_item(trans, root, &path);
9512                 if (ret)
9513                         break;
9514                 btrfs_release_path(&path);
9515                 nr_del++;
9516         }
9517         err = btrfs_commit_transaction(trans, root);
9518         if (err && !ret)
9519                 ret = err;
9520 out:
9521         while (!list_empty(&delete_list)) {
9522                 tmp = to_extent_record(delete_list.next);
9523                 list_del_init(&tmp->list);
9524                 if (tmp == rec)
9525                         continue;
9526                 free(tmp);
9527         }
9528
9529         while (!list_empty(&rec->dups)) {
9530                 tmp = to_extent_record(rec->dups.next);
9531                 list_del_init(&tmp->list);
9532                 free(tmp);
9533         }
9534
9535         btrfs_release_path(&path);
9536
9537         if (!ret && !nr_del)
9538                 rec->num_duplicates = 0;
9539
9540         return ret ? ret : nr_del;
9541 }
9542
9543 static int find_possible_backrefs(struct btrfs_fs_info *info,
9544                                   struct btrfs_path *path,
9545                                   struct cache_tree *extent_cache,
9546                                   struct extent_record *rec)
9547 {
9548         struct btrfs_root *root;
9549         struct extent_backref *back, *tmp;
9550         struct data_backref *dback;
9551         struct cache_extent *cache;
9552         struct btrfs_file_extent_item *fi;
9553         struct btrfs_key key;
9554         u64 bytenr, bytes;
9555         int ret;
9556
9557         rbtree_postorder_for_each_entry_safe(back, tmp,
9558                                              &rec->backref_tree, node) {
9559                 /* Don't care about full backrefs (poor unloved backrefs) */
9560                 if (back->full_backref || !back->is_data)
9561                         continue;
9562
9563                 dback = to_data_backref(back);
9564
9565                 /* We found this one, we don't need to do a lookup */
9566                 if (dback->found_ref)
9567                         continue;
9568
9569                 key.objectid = dback->root;
9570                 key.type = BTRFS_ROOT_ITEM_KEY;
9571                 key.offset = (u64)-1;
9572
9573                 root = btrfs_read_fs_root(info, &key);
9574
9575                 /* No root, definitely a bad ref, skip */
9576                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9577                         continue;
9578                 /* Other err, exit */
9579                 if (IS_ERR(root))
9580                         return PTR_ERR(root);
9581
9582                 key.objectid = dback->owner;
9583                 key.type = BTRFS_EXTENT_DATA_KEY;
9584                 key.offset = dback->offset;
9585                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9586                 if (ret) {
9587                         btrfs_release_path(path);
9588                         if (ret < 0)
9589                                 return ret;
9590                         /* Didn't find it, we can carry on */
9591                         ret = 0;
9592                         continue;
9593                 }
9594
9595                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9596                                     struct btrfs_file_extent_item);
9597                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9598                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9599                 btrfs_release_path(path);
9600                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9601                 if (cache) {
9602                         struct extent_record *tmp;
9603                         tmp = container_of(cache, struct extent_record, cache);
9604
9605                         /*
9606                          * If we found an extent record for the bytenr for this
9607                          * particular backref then we can't add it to our
9608                          * current extent record.  We only want to add backrefs
9609                          * that don't have a corresponding extent item in the
9610                          * extent tree since they likely belong to this record
9611                          * and we need to fix it if it doesn't match bytenrs.
9612                          */
9613                         if  (tmp->found_rec)
9614                                 continue;
9615                 }
9616
9617                 dback->found_ref += 1;
9618                 dback->disk_bytenr = bytenr;
9619                 dback->bytes = bytes;
9620
9621                 /*
9622                  * Set this so the verify backref code knows not to trust the
9623                  * values in this backref.
9624                  */
9625                 back->broken = 1;
9626         }
9627
9628         return 0;
9629 }
9630
9631 /*
9632  * Record orphan data ref into corresponding root.
9633  *
9634  * Return 0 if the extent item contains data ref and recorded.
9635  * Return 1 if the extent item contains no useful data ref
9636  *   On that case, it may contains only shared_dataref or metadata backref
9637  *   or the file extent exists(this should be handled by the extent bytenr
9638  *   recovery routine)
9639  * Return <0 if something goes wrong.
9640  */
9641 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9642                                       struct extent_record *rec)
9643 {
9644         struct btrfs_key key;
9645         struct btrfs_root *dest_root;
9646         struct extent_backref *back, *tmp;
9647         struct data_backref *dback;
9648         struct orphan_data_extent *orphan;
9649         struct btrfs_path path;
9650         int recorded_data_ref = 0;
9651         int ret = 0;
9652
9653         if (rec->metadata)
9654                 return 1;
9655         btrfs_init_path(&path);
9656         rbtree_postorder_for_each_entry_safe(back, tmp,
9657                                              &rec->backref_tree, node) {
9658                 if (back->full_backref || !back->is_data ||
9659                     !back->found_extent_tree)
9660                         continue;
9661                 dback = to_data_backref(back);
9662                 if (dback->found_ref)
9663                         continue;
9664                 key.objectid = dback->root;
9665                 key.type = BTRFS_ROOT_ITEM_KEY;
9666                 key.offset = (u64)-1;
9667
9668                 dest_root = btrfs_read_fs_root(fs_info, &key);
9669
9670                 /* For non-exist root we just skip it */
9671                 if (IS_ERR(dest_root) || !dest_root)
9672                         continue;
9673
9674                 key.objectid = dback->owner;
9675                 key.type = BTRFS_EXTENT_DATA_KEY;
9676                 key.offset = dback->offset;
9677
9678                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9679                 btrfs_release_path(&path);
9680                 /*
9681                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9682                  * we need to record it for inode/file extent rebuild.
9683                  * For ret > 0, we record it only for file extent rebuild.
9684                  * For ret == 0, the file extent exists but only bytenr
9685                  * mismatch, let the original bytenr fix routine to handle,
9686                  * don't record it.
9687                  */
9688                 if (ret == 0)
9689                         continue;
9690                 ret = 0;
9691                 orphan = malloc(sizeof(*orphan));
9692                 if (!orphan) {
9693                         ret = -ENOMEM;
9694                         goto out;
9695                 }
9696                 INIT_LIST_HEAD(&orphan->list);
9697                 orphan->root = dback->root;
9698                 orphan->objectid = dback->owner;
9699                 orphan->offset = dback->offset;
9700                 orphan->disk_bytenr = rec->cache.start;
9701                 orphan->disk_len = rec->cache.size;
9702                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9703                 recorded_data_ref = 1;
9704         }
9705 out:
9706         btrfs_release_path(&path);
9707         if (!ret)
9708                 return !recorded_data_ref;
9709         else
9710                 return ret;
9711 }
9712
9713 /*
9714  * when an incorrect extent item is found, this will delete
9715  * all of the existing entries for it and recreate them
9716  * based on what the tree scan found.
9717  */
9718 static int fixup_extent_refs(struct btrfs_fs_info *info,
9719                              struct cache_tree *extent_cache,
9720                              struct extent_record *rec)
9721 {
9722         struct btrfs_trans_handle *trans = NULL;
9723         int ret;
9724         struct btrfs_path path;
9725         struct cache_extent *cache;
9726         struct extent_backref *back, *tmp;
9727         int allocated = 0;
9728         u64 flags = 0;
9729
9730         if (rec->flag_block_full_backref)
9731                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9732
9733         btrfs_init_path(&path);
9734         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9735                 /*
9736                  * Sometimes the backrefs themselves are so broken they don't
9737                  * get attached to any meaningful rec, so first go back and
9738                  * check any of our backrefs that we couldn't find and throw
9739                  * them into the list if we find the backref so that
9740                  * verify_backrefs can figure out what to do.
9741                  */
9742                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9743                 if (ret < 0)
9744                         goto out;
9745         }
9746
9747         /* step one, make sure all of the backrefs agree */
9748         ret = verify_backrefs(info, &path, rec);
9749         if (ret < 0)
9750                 goto out;
9751
9752         trans = btrfs_start_transaction(info->extent_root, 1);
9753         if (IS_ERR(trans)) {
9754                 ret = PTR_ERR(trans);
9755                 goto out;
9756         }
9757
9758         /* step two, delete all the existing records */
9759         ret = delete_extent_records(trans, info->extent_root, &path,
9760                                     rec->start);
9761
9762         if (ret < 0)
9763                 goto out;
9764
9765         /* was this block corrupt?  If so, don't add references to it */
9766         cache = lookup_cache_extent(info->corrupt_blocks,
9767                                     rec->start, rec->max_size);
9768         if (cache) {
9769                 ret = 0;
9770                 goto out;
9771         }
9772
9773         /* step three, recreate all the refs we did find */
9774         rbtree_postorder_for_each_entry_safe(back, tmp,
9775                                              &rec->backref_tree, node) {
9776                 /*
9777                  * if we didn't find any references, don't create a
9778                  * new extent record
9779                  */
9780                 if (!back->found_ref)
9781                         continue;
9782
9783                 rec->bad_full_backref = 0;
9784                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9785                 allocated = 1;
9786
9787                 if (ret)
9788                         goto out;
9789         }
9790 out:
9791         if (trans) {
9792                 int err = btrfs_commit_transaction(trans, info->extent_root);
9793                 if (!ret)
9794                         ret = err;
9795         }
9796
9797         if (!ret)
9798                 fprintf(stderr, "Repaired extent references for %llu\n",
9799                                 (unsigned long long)rec->start);
9800
9801         btrfs_release_path(&path);
9802         return ret;
9803 }
9804
9805 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9806                               struct extent_record *rec)
9807 {
9808         struct btrfs_trans_handle *trans;
9809         struct btrfs_root *root = fs_info->extent_root;
9810         struct btrfs_path path;
9811         struct btrfs_extent_item *ei;
9812         struct btrfs_key key;
9813         u64 flags;
9814         int ret = 0;
9815
9816         key.objectid = rec->start;
9817         if (rec->metadata) {
9818                 key.type = BTRFS_METADATA_ITEM_KEY;
9819                 key.offset = rec->info_level;
9820         } else {
9821                 key.type = BTRFS_EXTENT_ITEM_KEY;
9822                 key.offset = rec->max_size;
9823         }
9824
9825         trans = btrfs_start_transaction(root, 0);
9826         if (IS_ERR(trans))
9827                 return PTR_ERR(trans);
9828
9829         btrfs_init_path(&path);
9830         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9831         if (ret < 0) {
9832                 btrfs_release_path(&path);
9833                 btrfs_commit_transaction(trans, root);
9834                 return ret;
9835         } else if (ret) {
9836                 fprintf(stderr, "Didn't find extent for %llu\n",
9837                         (unsigned long long)rec->start);
9838                 btrfs_release_path(&path);
9839                 btrfs_commit_transaction(trans, root);
9840                 return -ENOENT;
9841         }
9842
9843         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9844                             struct btrfs_extent_item);
9845         flags = btrfs_extent_flags(path.nodes[0], ei);
9846         if (rec->flag_block_full_backref) {
9847                 fprintf(stderr, "setting full backref on %llu\n",
9848                         (unsigned long long)key.objectid);
9849                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9850         } else {
9851                 fprintf(stderr, "clearing full backref on %llu\n",
9852                         (unsigned long long)key.objectid);
9853                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9854         }
9855         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9856         btrfs_mark_buffer_dirty(path.nodes[0]);
9857         btrfs_release_path(&path);
9858         ret = btrfs_commit_transaction(trans, root);
9859         if (!ret)
9860                 fprintf(stderr, "Repaired extent flags for %llu\n",
9861                                 (unsigned long long)rec->start);
9862
9863         return ret;
9864 }
9865
9866 /* right now we only prune from the extent allocation tree */
9867 static int prune_one_block(struct btrfs_trans_handle *trans,
9868                            struct btrfs_fs_info *info,
9869                            struct btrfs_corrupt_block *corrupt)
9870 {
9871         int ret;
9872         struct btrfs_path path;
9873         struct extent_buffer *eb;
9874         u64 found;
9875         int slot;
9876         int nritems;
9877         int level = corrupt->level + 1;
9878
9879         btrfs_init_path(&path);
9880 again:
9881         /* we want to stop at the parent to our busted block */
9882         path.lowest_level = level;
9883
9884         ret = btrfs_search_slot(trans, info->extent_root,
9885                                 &corrupt->key, &path, -1, 1);
9886
9887         if (ret < 0)
9888                 goto out;
9889
9890         eb = path.nodes[level];
9891         if (!eb) {
9892                 ret = -ENOENT;
9893                 goto out;
9894         }
9895
9896         /*
9897          * hopefully the search gave us the block we want to prune,
9898          * lets try that first
9899          */
9900         slot = path.slots[level];
9901         found =  btrfs_node_blockptr(eb, slot);
9902         if (found == corrupt->cache.start)
9903                 goto del_ptr;
9904
9905         nritems = btrfs_header_nritems(eb);
9906
9907         /* the search failed, lets scan this node and hope we find it */
9908         for (slot = 0; slot < nritems; slot++) {
9909                 found =  btrfs_node_blockptr(eb, slot);
9910                 if (found == corrupt->cache.start)
9911                         goto del_ptr;
9912         }
9913         /*
9914          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9915          * to this block
9916          */
9917         if (eb == info->extent_root->node) {
9918                 ret = -ENOENT;
9919                 goto out;
9920         } else {
9921                 level++;
9922                 btrfs_release_path(&path);
9923                 goto again;
9924         }
9925
9926 del_ptr:
9927         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9928         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9929
9930 out:
9931         btrfs_release_path(&path);
9932         return ret;
9933 }
9934
9935 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9936 {
9937         struct btrfs_trans_handle *trans = NULL;
9938         struct cache_extent *cache;
9939         struct btrfs_corrupt_block *corrupt;
9940
9941         while (1) {
9942                 cache = search_cache_extent(info->corrupt_blocks, 0);
9943                 if (!cache)
9944                         break;
9945                 if (!trans) {
9946                         trans = btrfs_start_transaction(info->extent_root, 1);
9947                         if (IS_ERR(trans))
9948                                 return PTR_ERR(trans);
9949                 }
9950                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9951                 prune_one_block(trans, info, corrupt);
9952                 remove_cache_extent(info->corrupt_blocks, cache);
9953         }
9954         if (trans)
9955                 return btrfs_commit_transaction(trans, info->extent_root);
9956         return 0;
9957 }
9958
9959 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9960 {
9961         struct btrfs_block_group_cache *cache;
9962         u64 start, end;
9963         int ret;
9964
9965         while (1) {
9966                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9967                                             &start, &end, EXTENT_DIRTY);
9968                 if (ret)
9969                         break;
9970                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9971         }
9972
9973         start = 0;
9974         while (1) {
9975                 cache = btrfs_lookup_first_block_group(fs_info, start);
9976                 if (!cache)
9977                         break;
9978                 if (cache->cached)
9979                         cache->cached = 0;
9980                 start = cache->key.objectid + cache->key.offset;
9981         }
9982 }
9983
9984 static int check_extent_refs(struct btrfs_root *root,
9985                              struct cache_tree *extent_cache)
9986 {
9987         struct extent_record *rec;
9988         struct cache_extent *cache;
9989         int ret = 0;
9990         int had_dups = 0;
9991
9992         if (repair) {
9993                 /*
9994                  * if we're doing a repair, we have to make sure
9995                  * we don't allocate from the problem extents.
9996                  * In the worst case, this will be all the
9997                  * extents in the FS
9998                  */
9999                 cache = search_cache_extent(extent_cache, 0);
10000                 while(cache) {
10001                         rec = container_of(cache, struct extent_record, cache);
10002                         set_extent_dirty(root->fs_info->excluded_extents,
10003                                          rec->start,
10004                                          rec->start + rec->max_size - 1);
10005                         cache = next_cache_extent(cache);
10006                 }
10007
10008                 /* pin down all the corrupted blocks too */
10009                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10010                 while(cache) {
10011                         set_extent_dirty(root->fs_info->excluded_extents,
10012                                          cache->start,
10013                                          cache->start + cache->size - 1);
10014                         cache = next_cache_extent(cache);
10015                 }
10016                 prune_corrupt_blocks(root->fs_info);
10017                 reset_cached_block_groups(root->fs_info);
10018         }
10019
10020         reset_cached_block_groups(root->fs_info);
10021
10022         /*
10023          * We need to delete any duplicate entries we find first otherwise we
10024          * could mess up the extent tree when we have backrefs that actually
10025          * belong to a different extent item and not the weird duplicate one.
10026          */
10027         while (repair && !list_empty(&duplicate_extents)) {
10028                 rec = to_extent_record(duplicate_extents.next);
10029                 list_del_init(&rec->list);
10030
10031                 /* Sometimes we can find a backref before we find an actual
10032                  * extent, so we need to process it a little bit to see if there
10033                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10034                  * if this is a backref screwup.  If we need to delete stuff
10035                  * process_duplicates() will return 0, otherwise it will return
10036                  * 1 and we
10037                  */
10038                 if (process_duplicates(extent_cache, rec))
10039                         continue;
10040                 ret = delete_duplicate_records(root, rec);
10041                 if (ret < 0)
10042                         return ret;
10043                 /*
10044                  * delete_duplicate_records will return the number of entries
10045                  * deleted, so if it's greater than 0 then we know we actually
10046                  * did something and we need to remove.
10047                  */
10048                 if (ret)
10049                         had_dups = 1;
10050         }
10051
10052         if (had_dups)
10053                 return -EAGAIN;
10054
10055         while(1) {
10056                 int cur_err = 0;
10057                 int fix = 0;
10058
10059                 cache = search_cache_extent(extent_cache, 0);
10060                 if (!cache)
10061                         break;
10062                 rec = container_of(cache, struct extent_record, cache);
10063                 if (rec->num_duplicates) {
10064                         fprintf(stderr, "extent item %llu has multiple extent "
10065                                 "items\n", (unsigned long long)rec->start);
10066                         cur_err = 1;
10067                 }
10068
10069                 if (rec->refs != rec->extent_item_refs) {
10070                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10071                                 (unsigned long long)rec->start,
10072                                 (unsigned long long)rec->nr);
10073                         fprintf(stderr, "extent item %llu, found %llu\n",
10074                                 (unsigned long long)rec->extent_item_refs,
10075                                 (unsigned long long)rec->refs);
10076                         ret = record_orphan_data_extents(root->fs_info, rec);
10077                         if (ret < 0)
10078                                 goto repair_abort;
10079                         fix = ret;
10080                         cur_err = 1;
10081                 }
10082                 if (all_backpointers_checked(rec, 1)) {
10083                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10084                                 (unsigned long long)rec->start,
10085                                 (unsigned long long)rec->nr);
10086                         fix = 1;
10087                         cur_err = 1;
10088                 }
10089                 if (!rec->owner_ref_checked) {
10090                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10091                                 (unsigned long long)rec->start,
10092                                 (unsigned long long)rec->nr);
10093                         fix = 1;
10094                         cur_err = 1;
10095                 }
10096
10097                 if (repair && fix) {
10098                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10099                         if (ret)
10100                                 goto repair_abort;
10101                 }
10102
10103
10104                 if (rec->bad_full_backref) {
10105                         fprintf(stderr, "bad full backref, on [%llu]\n",
10106                                 (unsigned long long)rec->start);
10107                         if (repair) {
10108                                 ret = fixup_extent_flags(root->fs_info, rec);
10109                                 if (ret)
10110                                         goto repair_abort;
10111                                 fix = 1;
10112                         }
10113                         cur_err = 1;
10114                 }
10115                 /*
10116                  * Although it's not a extent ref's problem, we reuse this
10117                  * routine for error reporting.
10118                  * No repair function yet.
10119                  */
10120                 if (rec->crossing_stripes) {
10121                         fprintf(stderr,
10122                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10123                                 rec->start, rec->start + rec->max_size);
10124                         cur_err = 1;
10125                 }
10126
10127                 if (rec->wrong_chunk_type) {
10128                         fprintf(stderr,
10129                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10130                                 rec->start, rec->start + rec->max_size);
10131                         cur_err = 1;
10132                 }
10133
10134                 remove_cache_extent(extent_cache, cache);
10135                 free_all_extent_backrefs(rec);
10136                 if (!init_extent_tree && repair && (!cur_err || fix))
10137                         clear_extent_dirty(root->fs_info->excluded_extents,
10138                                            rec->start,
10139                                            rec->start + rec->max_size - 1);
10140                 free(rec);
10141         }
10142 repair_abort:
10143         if (repair) {
10144                 if (ret && ret != -EAGAIN) {
10145                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10146                         exit(1);
10147                 } else if (!ret) {
10148                         struct btrfs_trans_handle *trans;
10149
10150                         root = root->fs_info->extent_root;
10151                         trans = btrfs_start_transaction(root, 1);
10152                         if (IS_ERR(trans)) {
10153                                 ret = PTR_ERR(trans);
10154                                 goto repair_abort;
10155                         }
10156
10157                         ret = btrfs_fix_block_accounting(trans, root);
10158                         if (ret)
10159                                 goto repair_abort;
10160                         ret = btrfs_commit_transaction(trans, root);
10161                         if (ret)
10162                                 goto repair_abort;
10163                 }
10164                 return ret;
10165         }
10166         return 0;
10167 }
10168
10169 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10170 {
10171         u64 stripe_size;
10172
10173         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10174                 stripe_size = length;
10175                 stripe_size /= num_stripes;
10176         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10177                 stripe_size = length * 2;
10178                 stripe_size /= num_stripes;
10179         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10180                 stripe_size = length;
10181                 stripe_size /= (num_stripes - 1);
10182         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10183                 stripe_size = length;
10184                 stripe_size /= (num_stripes - 2);
10185         } else {
10186                 stripe_size = length;
10187         }
10188         return stripe_size;
10189 }
10190
10191 /*
10192  * Check the chunk with its block group/dev list ref:
10193  * Return 0 if all refs seems valid.
10194  * Return 1 if part of refs seems valid, need later check for rebuild ref
10195  * like missing block group and needs to search extent tree to rebuild them.
10196  * Return -1 if essential refs are missing and unable to rebuild.
10197  */
10198 static int check_chunk_refs(struct chunk_record *chunk_rec,
10199                             struct block_group_tree *block_group_cache,
10200                             struct device_extent_tree *dev_extent_cache,
10201                             int silent)
10202 {
10203         struct cache_extent *block_group_item;
10204         struct block_group_record *block_group_rec;
10205         struct cache_extent *dev_extent_item;
10206         struct device_extent_record *dev_extent_rec;
10207         u64 devid;
10208         u64 offset;
10209         u64 length;
10210         int metadump_v2 = 0;
10211         int i;
10212         int ret = 0;
10213
10214         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10215                                                chunk_rec->offset,
10216                                                chunk_rec->length);
10217         if (block_group_item) {
10218                 block_group_rec = container_of(block_group_item,
10219                                                struct block_group_record,
10220                                                cache);
10221                 if (chunk_rec->length != block_group_rec->offset ||
10222                     chunk_rec->offset != block_group_rec->objectid ||
10223                     (!metadump_v2 &&
10224                      chunk_rec->type_flags != block_group_rec->flags)) {
10225                         if (!silent)
10226                                 fprintf(stderr,
10227                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10228                                         chunk_rec->objectid,
10229                                         chunk_rec->type,
10230                                         chunk_rec->offset,
10231                                         chunk_rec->length,
10232                                         chunk_rec->offset,
10233                                         chunk_rec->type_flags,
10234                                         block_group_rec->objectid,
10235                                         block_group_rec->type,
10236                                         block_group_rec->offset,
10237                                         block_group_rec->offset,
10238                                         block_group_rec->objectid,
10239                                         block_group_rec->flags);
10240                         ret = -1;
10241                 } else {
10242                         list_del_init(&block_group_rec->list);
10243                         chunk_rec->bg_rec = block_group_rec;
10244                 }
10245         } else {
10246                 if (!silent)
10247                         fprintf(stderr,
10248                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10249                                 chunk_rec->objectid,
10250                                 chunk_rec->type,
10251                                 chunk_rec->offset,
10252                                 chunk_rec->length,
10253                                 chunk_rec->offset,
10254                                 chunk_rec->type_flags);
10255                 ret = 1;
10256         }
10257
10258         if (metadump_v2)
10259                 return ret;
10260
10261         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10262                                     chunk_rec->num_stripes);
10263         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10264                 devid = chunk_rec->stripes[i].devid;
10265                 offset = chunk_rec->stripes[i].offset;
10266                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10267                                                        devid, offset, length);
10268                 if (dev_extent_item) {
10269                         dev_extent_rec = container_of(dev_extent_item,
10270                                                 struct device_extent_record,
10271                                                 cache);
10272                         if (dev_extent_rec->objectid != devid ||
10273                             dev_extent_rec->offset != offset ||
10274                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10275                             dev_extent_rec->length != length) {
10276                                 if (!silent)
10277                                         fprintf(stderr,
10278                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10279                                                 chunk_rec->objectid,
10280                                                 chunk_rec->type,
10281                                                 chunk_rec->offset,
10282                                                 chunk_rec->stripes[i].devid,
10283                                                 chunk_rec->stripes[i].offset,
10284                                                 dev_extent_rec->objectid,
10285                                                 dev_extent_rec->offset,
10286                                                 dev_extent_rec->length);
10287                                 ret = -1;
10288                         } else {
10289                                 list_move(&dev_extent_rec->chunk_list,
10290                                           &chunk_rec->dextents);
10291                         }
10292                 } else {
10293                         if (!silent)
10294                                 fprintf(stderr,
10295                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10296                                         chunk_rec->objectid,
10297                                         chunk_rec->type,
10298                                         chunk_rec->offset,
10299                                         chunk_rec->stripes[i].devid,
10300                                         chunk_rec->stripes[i].offset);
10301                         ret = -1;
10302                 }
10303         }
10304         return ret;
10305 }
10306
10307 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10308 int check_chunks(struct cache_tree *chunk_cache,
10309                  struct block_group_tree *block_group_cache,
10310                  struct device_extent_tree *dev_extent_cache,
10311                  struct list_head *good, struct list_head *bad,
10312                  struct list_head *rebuild, int silent)
10313 {
10314         struct cache_extent *chunk_item;
10315         struct chunk_record *chunk_rec;
10316         struct block_group_record *bg_rec;
10317         struct device_extent_record *dext_rec;
10318         int err;
10319         int ret = 0;
10320
10321         chunk_item = first_cache_extent(chunk_cache);
10322         while (chunk_item) {
10323                 chunk_rec = container_of(chunk_item, struct chunk_record,
10324                                          cache);
10325                 err = check_chunk_refs(chunk_rec, block_group_cache,
10326                                        dev_extent_cache, silent);
10327                 if (err < 0)
10328                         ret = err;
10329                 if (err == 0 && good)
10330                         list_add_tail(&chunk_rec->list, good);
10331                 if (err > 0 && rebuild)
10332                         list_add_tail(&chunk_rec->list, rebuild);
10333                 if (err < 0 && bad)
10334                         list_add_tail(&chunk_rec->list, bad);
10335                 chunk_item = next_cache_extent(chunk_item);
10336         }
10337
10338         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10339                 if (!silent)
10340                         fprintf(stderr,
10341                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10342                                 bg_rec->objectid,
10343                                 bg_rec->offset,
10344                                 bg_rec->flags);
10345                 if (!ret)
10346                         ret = 1;
10347         }
10348
10349         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10350                             chunk_list) {
10351                 if (!silent)
10352                         fprintf(stderr,
10353                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10354                                 dext_rec->objectid,
10355                                 dext_rec->offset,
10356                                 dext_rec->length);
10357                 if (!ret)
10358                         ret = 1;
10359         }
10360         return ret;
10361 }
10362
10363
10364 static int check_device_used(struct device_record *dev_rec,
10365                              struct device_extent_tree *dext_cache)
10366 {
10367         struct cache_extent *cache;
10368         struct device_extent_record *dev_extent_rec;
10369         u64 total_byte = 0;
10370
10371         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10372         while (cache) {
10373                 dev_extent_rec = container_of(cache,
10374                                               struct device_extent_record,
10375                                               cache);
10376                 if (dev_extent_rec->objectid != dev_rec->devid)
10377                         break;
10378
10379                 list_del_init(&dev_extent_rec->device_list);
10380                 total_byte += dev_extent_rec->length;
10381                 cache = next_cache_extent(cache);
10382         }
10383
10384         if (total_byte != dev_rec->byte_used) {
10385                 fprintf(stderr,
10386                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10387                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10388                         dev_rec->type, dev_rec->offset);
10389                 return -1;
10390         } else {
10391                 return 0;
10392         }
10393 }
10394
10395 /* check btrfs_dev_item -> btrfs_dev_extent */
10396 static int check_devices(struct rb_root *dev_cache,
10397                          struct device_extent_tree *dev_extent_cache)
10398 {
10399         struct rb_node *dev_node;
10400         struct device_record *dev_rec;
10401         struct device_extent_record *dext_rec;
10402         int err;
10403         int ret = 0;
10404
10405         dev_node = rb_first(dev_cache);
10406         while (dev_node) {
10407                 dev_rec = container_of(dev_node, struct device_record, node);
10408                 err = check_device_used(dev_rec, dev_extent_cache);
10409                 if (err)
10410                         ret = err;
10411
10412                 dev_node = rb_next(dev_node);
10413         }
10414         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10415                             device_list) {
10416                 fprintf(stderr,
10417                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10418                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10419                 if (!ret)
10420                         ret = 1;
10421         }
10422         return ret;
10423 }
10424
10425 static int add_root_item_to_list(struct list_head *head,
10426                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10427                                   u8 level, u8 drop_level,
10428                                   struct btrfs_key *drop_key)
10429 {
10430
10431         struct root_item_record *ri_rec;
10432         ri_rec = malloc(sizeof(*ri_rec));
10433         if (!ri_rec)
10434                 return -ENOMEM;
10435         ri_rec->bytenr = bytenr;
10436         ri_rec->objectid = objectid;
10437         ri_rec->level = level;
10438         ri_rec->drop_level = drop_level;
10439         ri_rec->last_snapshot = last_snapshot;
10440         if (drop_key)
10441                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10442         list_add_tail(&ri_rec->list, head);
10443
10444         return 0;
10445 }
10446
10447 static void free_root_item_list(struct list_head *list)
10448 {
10449         struct root_item_record *ri_rec;
10450
10451         while (!list_empty(list)) {
10452                 ri_rec = list_first_entry(list, struct root_item_record,
10453                                           list);
10454                 list_del_init(&ri_rec->list);
10455                 free(ri_rec);
10456         }
10457 }
10458
10459 static int deal_root_from_list(struct list_head *list,
10460                                struct btrfs_root *root,
10461                                struct block_info *bits,
10462                                int bits_nr,
10463                                struct cache_tree *pending,
10464                                struct cache_tree *seen,
10465                                struct cache_tree *reada,
10466                                struct cache_tree *nodes,
10467                                struct cache_tree *extent_cache,
10468                                struct cache_tree *chunk_cache,
10469                                struct rb_root *dev_cache,
10470                                struct block_group_tree *block_group_cache,
10471                                struct device_extent_tree *dev_extent_cache)
10472 {
10473         int ret = 0;
10474         u64 last;
10475
10476         while (!list_empty(list)) {
10477                 struct root_item_record *rec;
10478                 struct extent_buffer *buf;
10479                 rec = list_entry(list->next,
10480                                  struct root_item_record, list);
10481                 last = 0;
10482                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10483                 if (!extent_buffer_uptodate(buf)) {
10484                         free_extent_buffer(buf);
10485                         ret = -EIO;
10486                         break;
10487                 }
10488                 ret = add_root_to_pending(buf, extent_cache, pending,
10489                                     seen, nodes, rec->objectid);
10490                 if (ret < 0)
10491                         break;
10492                 /*
10493                  * To rebuild extent tree, we need deal with snapshot
10494                  * one by one, otherwise we deal with node firstly which
10495                  * can maximize readahead.
10496                  */
10497                 while (1) {
10498                         ret = run_next_block(root, bits, bits_nr, &last,
10499                                              pending, seen, reada, nodes,
10500                                              extent_cache, chunk_cache,
10501                                              dev_cache, block_group_cache,
10502                                              dev_extent_cache, rec);
10503                         if (ret != 0)
10504                                 break;
10505                 }
10506                 free_extent_buffer(buf);
10507                 list_del(&rec->list);
10508                 free(rec);
10509                 if (ret < 0)
10510                         break;
10511         }
10512         while (ret >= 0) {
10513                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10514                                      reada, nodes, extent_cache, chunk_cache,
10515                                      dev_cache, block_group_cache,
10516                                      dev_extent_cache, NULL);
10517                 if (ret != 0) {
10518                         if (ret > 0)
10519                                 ret = 0;
10520                         break;
10521                 }
10522         }
10523         return ret;
10524 }
10525
10526 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10527 {
10528         struct rb_root dev_cache;
10529         struct cache_tree chunk_cache;
10530         struct block_group_tree block_group_cache;
10531         struct device_extent_tree dev_extent_cache;
10532         struct cache_tree extent_cache;
10533         struct cache_tree seen;
10534         struct cache_tree pending;
10535         struct cache_tree reada;
10536         struct cache_tree nodes;
10537         struct extent_io_tree excluded_extents;
10538         struct cache_tree corrupt_blocks;
10539         struct btrfs_path path;
10540         struct btrfs_key key;
10541         struct btrfs_key found_key;
10542         int ret, err = 0;
10543         struct block_info *bits;
10544         int bits_nr;
10545         struct extent_buffer *leaf;
10546         int slot;
10547         struct btrfs_root_item ri;
10548         struct list_head dropping_trees;
10549         struct list_head normal_trees;
10550         struct btrfs_root *root1;
10551         struct btrfs_root *root;
10552         u64 objectid;
10553         u8 level;
10554
10555         root = fs_info->fs_root;
10556         dev_cache = RB_ROOT;
10557         cache_tree_init(&chunk_cache);
10558         block_group_tree_init(&block_group_cache);
10559         device_extent_tree_init(&dev_extent_cache);
10560
10561         cache_tree_init(&extent_cache);
10562         cache_tree_init(&seen);
10563         cache_tree_init(&pending);
10564         cache_tree_init(&nodes);
10565         cache_tree_init(&reada);
10566         cache_tree_init(&corrupt_blocks);
10567         extent_io_tree_init(&excluded_extents);
10568         INIT_LIST_HEAD(&dropping_trees);
10569         INIT_LIST_HEAD(&normal_trees);
10570
10571         if (repair) {
10572                 fs_info->excluded_extents = &excluded_extents;
10573                 fs_info->fsck_extent_cache = &extent_cache;
10574                 fs_info->free_extent_hook = free_extent_hook;
10575                 fs_info->corrupt_blocks = &corrupt_blocks;
10576         }
10577
10578         bits_nr = 1024;
10579         bits = malloc(bits_nr * sizeof(struct block_info));
10580         if (!bits) {
10581                 perror("malloc");
10582                 exit(1);
10583         }
10584
10585         if (ctx.progress_enabled) {
10586                 ctx.tp = TASK_EXTENTS;
10587                 task_start(ctx.info);
10588         }
10589
10590 again:
10591         root1 = fs_info->tree_root;
10592         level = btrfs_header_level(root1->node);
10593         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10594                                     root1->node->start, 0, level, 0, NULL);
10595         if (ret < 0)
10596                 goto out;
10597         root1 = fs_info->chunk_root;
10598         level = btrfs_header_level(root1->node);
10599         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10600                                     root1->node->start, 0, level, 0, NULL);
10601         if (ret < 0)
10602                 goto out;
10603         btrfs_init_path(&path);
10604         key.offset = 0;
10605         key.objectid = 0;
10606         key.type = BTRFS_ROOT_ITEM_KEY;
10607         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10608         if (ret < 0)
10609                 goto out;
10610         while(1) {
10611                 leaf = path.nodes[0];
10612                 slot = path.slots[0];
10613                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10614                         ret = btrfs_next_leaf(root, &path);
10615                         if (ret != 0)
10616                                 break;
10617                         leaf = path.nodes[0];
10618                         slot = path.slots[0];
10619                 }
10620                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10621                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10622                         unsigned long offset;
10623                         u64 last_snapshot;
10624
10625                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10626                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10627                         last_snapshot = btrfs_root_last_snapshot(&ri);
10628                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10629                                 level = btrfs_root_level(&ri);
10630                                 ret = add_root_item_to_list(&normal_trees,
10631                                                 found_key.objectid,
10632                                                 btrfs_root_bytenr(&ri),
10633                                                 last_snapshot, level,
10634                                                 0, NULL);
10635                                 if (ret < 0)
10636                                         goto out;
10637                         } else {
10638                                 level = btrfs_root_level(&ri);
10639                                 objectid = found_key.objectid;
10640                                 btrfs_disk_key_to_cpu(&found_key,
10641                                                       &ri.drop_progress);
10642                                 ret = add_root_item_to_list(&dropping_trees,
10643                                                 objectid,
10644                                                 btrfs_root_bytenr(&ri),
10645                                                 last_snapshot, level,
10646                                                 ri.drop_level, &found_key);
10647                                 if (ret < 0)
10648                                         goto out;
10649                         }
10650                 }
10651                 path.slots[0]++;
10652         }
10653         btrfs_release_path(&path);
10654
10655         /*
10656          * check_block can return -EAGAIN if it fixes something, please keep
10657          * this in mind when dealing with return values from these functions, if
10658          * we get -EAGAIN we want to fall through and restart the loop.
10659          */
10660         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10661                                   &seen, &reada, &nodes, &extent_cache,
10662                                   &chunk_cache, &dev_cache, &block_group_cache,
10663                                   &dev_extent_cache);
10664         if (ret < 0) {
10665                 if (ret == -EAGAIN)
10666                         goto loop;
10667                 goto out;
10668         }
10669         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10670                                   &pending, &seen, &reada, &nodes,
10671                                   &extent_cache, &chunk_cache, &dev_cache,
10672                                   &block_group_cache, &dev_extent_cache);
10673         if (ret < 0) {
10674                 if (ret == -EAGAIN)
10675                         goto loop;
10676                 goto out;
10677         }
10678
10679         ret = check_chunks(&chunk_cache, &block_group_cache,
10680                            &dev_extent_cache, NULL, NULL, NULL, 0);
10681         if (ret) {
10682                 if (ret == -EAGAIN)
10683                         goto loop;
10684                 err = ret;
10685         }
10686
10687         ret = check_extent_refs(root, &extent_cache);
10688         if (ret < 0) {
10689                 if (ret == -EAGAIN)
10690                         goto loop;
10691                 goto out;
10692         }
10693
10694         ret = check_devices(&dev_cache, &dev_extent_cache);
10695         if (ret && err)
10696                 ret = err;
10697
10698 out:
10699         task_stop(ctx.info);
10700         if (repair) {
10701                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10702                 extent_io_tree_cleanup(&excluded_extents);
10703                 fs_info->fsck_extent_cache = NULL;
10704                 fs_info->free_extent_hook = NULL;
10705                 fs_info->corrupt_blocks = NULL;
10706                 fs_info->excluded_extents = NULL;
10707         }
10708         free(bits);
10709         free_chunk_cache_tree(&chunk_cache);
10710         free_device_cache_tree(&dev_cache);
10711         free_block_group_tree(&block_group_cache);
10712         free_device_extent_tree(&dev_extent_cache);
10713         free_extent_cache_tree(&seen);
10714         free_extent_cache_tree(&pending);
10715         free_extent_cache_tree(&reada);
10716         free_extent_cache_tree(&nodes);
10717         free_root_item_list(&normal_trees);
10718         free_root_item_list(&dropping_trees);
10719         return ret;
10720 loop:
10721         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10722         free_extent_cache_tree(&seen);
10723         free_extent_cache_tree(&pending);
10724         free_extent_cache_tree(&reada);
10725         free_extent_cache_tree(&nodes);
10726         free_chunk_cache_tree(&chunk_cache);
10727         free_block_group_tree(&block_group_cache);
10728         free_device_cache_tree(&dev_cache);
10729         free_device_extent_tree(&dev_extent_cache);
10730         free_extent_record_cache(&extent_cache);
10731         free_root_item_list(&normal_trees);
10732         free_root_item_list(&dropping_trees);
10733         extent_io_tree_cleanup(&excluded_extents);
10734         goto again;
10735 }
10736
10737 /*
10738  * Check backrefs of a tree block given by @bytenr or @eb.
10739  *
10740  * @root:       the root containing the @bytenr or @eb
10741  * @eb:         tree block extent buffer, can be NULL
10742  * @bytenr:     bytenr of the tree block to search
10743  * @level:      tree level of the tree block
10744  * @owner:      owner of the tree block
10745  *
10746  * Return >0 for any error found and output error message
10747  * Return 0 for no error found
10748  */
10749 static int check_tree_block_ref(struct btrfs_root *root,
10750                                 struct extent_buffer *eb, u64 bytenr,
10751                                 int level, u64 owner)
10752 {
10753         struct btrfs_key key;
10754         struct btrfs_root *extent_root = root->fs_info->extent_root;
10755         struct btrfs_path path;
10756         struct btrfs_extent_item *ei;
10757         struct btrfs_extent_inline_ref *iref;
10758         struct extent_buffer *leaf;
10759         unsigned long end;
10760         unsigned long ptr;
10761         int slot;
10762         int skinny_level;
10763         int type;
10764         u32 nodesize = root->fs_info->nodesize;
10765         u32 item_size;
10766         u64 offset;
10767         int tree_reloc_root = 0;
10768         int found_ref = 0;
10769         int err = 0;
10770         int ret;
10771
10772         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10773             btrfs_header_bytenr(root->node) == bytenr)
10774                 tree_reloc_root = 1;
10775
10776         btrfs_init_path(&path);
10777         key.objectid = bytenr;
10778         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10779                 key.type = BTRFS_METADATA_ITEM_KEY;
10780         else
10781                 key.type = BTRFS_EXTENT_ITEM_KEY;
10782         key.offset = (u64)-1;
10783
10784         /* Search for the backref in extent tree */
10785         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10786         if (ret < 0) {
10787                 err |= BACKREF_MISSING;
10788                 goto out;
10789         }
10790         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10791         if (ret) {
10792                 err |= BACKREF_MISSING;
10793                 goto out;
10794         }
10795
10796         leaf = path.nodes[0];
10797         slot = path.slots[0];
10798         btrfs_item_key_to_cpu(leaf, &key, slot);
10799
10800         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10801
10802         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10803                 skinny_level = (int)key.offset;
10804                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10805         } else {
10806                 struct btrfs_tree_block_info *info;
10807
10808                 info = (struct btrfs_tree_block_info *)(ei + 1);
10809                 skinny_level = btrfs_tree_block_level(leaf, info);
10810                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10811         }
10812
10813         if (eb) {
10814                 u64 header_gen;
10815                 u64 extent_gen;
10816
10817                 if (!(btrfs_extent_flags(leaf, ei) &
10818                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10819                         error(
10820                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10821                                 key.objectid, nodesize,
10822                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10823                         err = BACKREF_MISMATCH;
10824                 }
10825                 header_gen = btrfs_header_generation(eb);
10826                 extent_gen = btrfs_extent_generation(leaf, ei);
10827                 if (header_gen != extent_gen) {
10828                         error(
10829         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10830                                 key.objectid, nodesize, header_gen,
10831                                 extent_gen);
10832                         err = BACKREF_MISMATCH;
10833                 }
10834                 if (level != skinny_level) {
10835                         error(
10836                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10837                                 key.objectid, nodesize, level, skinny_level);
10838                         err = BACKREF_MISMATCH;
10839                 }
10840                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10841                         error(
10842                         "extent[%llu %u] is referred by other roots than %llu",
10843                                 key.objectid, nodesize, root->objectid);
10844                         err = BACKREF_MISMATCH;
10845                 }
10846         }
10847
10848         /*
10849          * Iterate the extent/metadata item to find the exact backref
10850          */
10851         item_size = btrfs_item_size_nr(leaf, slot);
10852         ptr = (unsigned long)iref;
10853         end = (unsigned long)ei + item_size;
10854         while (ptr < end) {
10855                 iref = (struct btrfs_extent_inline_ref *)ptr;
10856                 type = btrfs_extent_inline_ref_type(leaf, iref);
10857                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10858
10859                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10860                         (offset == root->objectid || offset == owner)) {
10861                         found_ref = 1;
10862                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10863                         /*
10864                          * Backref of tree reloc root points to itself, no need
10865                          * to check backref any more.
10866                          */
10867                         if (tree_reloc_root)
10868                                 found_ref = 1;
10869                         else
10870                         /* Check if the backref points to valid referencer */
10871                                 found_ref = !check_tree_block_ref(root, NULL,
10872                                                 offset, level + 1, owner);
10873                 }
10874
10875                 if (found_ref)
10876                         break;
10877                 ptr += btrfs_extent_inline_ref_size(type);
10878         }
10879
10880         /*
10881          * Inlined extent item doesn't have what we need, check
10882          * TREE_BLOCK_REF_KEY
10883          */
10884         if (!found_ref) {
10885                 btrfs_release_path(&path);
10886                 key.objectid = bytenr;
10887                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10888                 key.offset = root->objectid;
10889
10890                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10891                 if (!ret)
10892                         found_ref = 1;
10893         }
10894         if (!found_ref)
10895                 err |= BACKREF_MISSING;
10896 out:
10897         btrfs_release_path(&path);
10898         if (eb && (err & BACKREF_MISSING))
10899                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10900                         bytenr, nodesize, owner, level);
10901         return err;
10902 }
10903
10904 /*
10905  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10906  *
10907  * Return >0 any error found and output error message
10908  * Return 0 for no error found
10909  */
10910 static int check_extent_data_item(struct btrfs_root *root,
10911                                   struct extent_buffer *eb, int slot)
10912 {
10913         struct btrfs_file_extent_item *fi;
10914         struct btrfs_path path;
10915         struct btrfs_root *extent_root = root->fs_info->extent_root;
10916         struct btrfs_key fi_key;
10917         struct btrfs_key dbref_key;
10918         struct extent_buffer *leaf;
10919         struct btrfs_extent_item *ei;
10920         struct btrfs_extent_inline_ref *iref;
10921         struct btrfs_extent_data_ref *dref;
10922         u64 owner;
10923         u64 disk_bytenr;
10924         u64 disk_num_bytes;
10925         u64 extent_num_bytes;
10926         u64 extent_flags;
10927         u32 item_size;
10928         unsigned long end;
10929         unsigned long ptr;
10930         int type;
10931         u64 ref_root;
10932         int found_dbackref = 0;
10933         int err = 0;
10934         int ret;
10935
10936         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10937         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10938
10939         /* Nothing to check for hole and inline data extents */
10940         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10941             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10942                 return 0;
10943
10944         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10945         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10946         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10947
10948         /* Check unaligned disk_num_bytes and num_bytes */
10949         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10950                 error(
10951 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10952                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10953                         root->fs_info->sectorsize);
10954                 err |= BYTES_UNALIGNED;
10955         } else {
10956                 data_bytes_allocated += disk_num_bytes;
10957         }
10958         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10959                 error(
10960 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10961                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10962                         root->fs_info->sectorsize);
10963                 err |= BYTES_UNALIGNED;
10964         } else {
10965                 data_bytes_referenced += extent_num_bytes;
10966         }
10967         owner = btrfs_header_owner(eb);
10968
10969         /* Check the extent item of the file extent in extent tree */
10970         btrfs_init_path(&path);
10971         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10972         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10973         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10974
10975         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10976         if (ret)
10977                 goto out;
10978
10979         leaf = path.nodes[0];
10980         slot = path.slots[0];
10981         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10982
10983         extent_flags = btrfs_extent_flags(leaf, ei);
10984
10985         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10986                 error(
10987                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10988                     disk_bytenr, disk_num_bytes,
10989                     BTRFS_EXTENT_FLAG_DATA);
10990                 err |= BACKREF_MISMATCH;
10991         }
10992
10993         /* Check data backref inside that extent item */
10994         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10995         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10996         ptr = (unsigned long)iref;
10997         end = (unsigned long)ei + item_size;
10998         while (ptr < end) {
10999                 iref = (struct btrfs_extent_inline_ref *)ptr;
11000                 type = btrfs_extent_inline_ref_type(leaf, iref);
11001                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11002
11003                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11004                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11005                         if (ref_root == owner || ref_root == root->objectid)
11006                                 found_dbackref = 1;
11007                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11008                         found_dbackref = !check_tree_block_ref(root, NULL,
11009                                 btrfs_extent_inline_ref_offset(leaf, iref),
11010                                 0, owner);
11011                 }
11012
11013                 if (found_dbackref)
11014                         break;
11015                 ptr += btrfs_extent_inline_ref_size(type);
11016         }
11017
11018         if (!found_dbackref) {
11019                 btrfs_release_path(&path);
11020
11021                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11022                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11023                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11024                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11025                                 fi_key.objectid, fi_key.offset);
11026
11027                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11028                                         &dbref_key, &path, 0, 0);
11029                 if (!ret) {
11030                         found_dbackref = 1;
11031                         goto out;
11032                 }
11033
11034                 btrfs_release_path(&path);
11035
11036                 /*
11037                  * Neither inlined nor EXTENT_DATA_REF found, try
11038                  * SHARED_DATA_REF as last chance.
11039                  */
11040                 dbref_key.objectid = disk_bytenr;
11041                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11042                 dbref_key.offset = eb->start;
11043
11044                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11045                                         &dbref_key, &path, 0, 0);
11046                 if (!ret) {
11047                         found_dbackref = 1;
11048                         goto out;
11049                 }
11050         }
11051
11052 out:
11053         if (!found_dbackref)
11054                 err |= BACKREF_MISSING;
11055         btrfs_release_path(&path);
11056         if (err & BACKREF_MISSING) {
11057                 error("data extent[%llu %llu] backref lost",
11058                       disk_bytenr, disk_num_bytes);
11059         }
11060         return err;
11061 }
11062
11063 /*
11064  * Get real tree block level for the case like shared block
11065  * Return >= 0 as tree level
11066  * Return <0 for error
11067  */
11068 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11069 {
11070         struct extent_buffer *eb;
11071         struct btrfs_path path;
11072         struct btrfs_key key;
11073         struct btrfs_extent_item *ei;
11074         u64 flags;
11075         u64 transid;
11076         u8 backref_level;
11077         u8 header_level;
11078         int ret;
11079
11080         /* Search extent tree for extent generation and level */
11081         key.objectid = bytenr;
11082         key.type = BTRFS_METADATA_ITEM_KEY;
11083         key.offset = (u64)-1;
11084
11085         btrfs_init_path(&path);
11086         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11087         if (ret < 0)
11088                 goto release_out;
11089         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11090         if (ret < 0)
11091                 goto release_out;
11092         if (ret > 0) {
11093                 ret = -ENOENT;
11094                 goto release_out;
11095         }
11096
11097         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11098         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11099                             struct btrfs_extent_item);
11100         flags = btrfs_extent_flags(path.nodes[0], ei);
11101         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11102                 ret = -ENOENT;
11103                 goto release_out;
11104         }
11105
11106         /* Get transid for later read_tree_block() check */
11107         transid = btrfs_extent_generation(path.nodes[0], ei);
11108
11109         /* Get backref level as one source */
11110         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11111                 backref_level = key.offset;
11112         } else {
11113                 struct btrfs_tree_block_info *info;
11114
11115                 info = (struct btrfs_tree_block_info *)(ei + 1);
11116                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11117         }
11118         btrfs_release_path(&path);
11119
11120         /* Get level from tree block as an alternative source */
11121         eb = read_tree_block(fs_info, bytenr, transid);
11122         if (!extent_buffer_uptodate(eb)) {
11123                 free_extent_buffer(eb);
11124                 return -EIO;
11125         }
11126         header_level = btrfs_header_level(eb);
11127         free_extent_buffer(eb);
11128
11129         if (header_level != backref_level)
11130                 return -EIO;
11131         return header_level;
11132
11133 release_out:
11134         btrfs_release_path(&path);
11135         return ret;
11136 }
11137
11138 /*
11139  * Check if a tree block backref is valid (points to a valid tree block)
11140  * if level == -1, level will be resolved
11141  * Return >0 for any error found and print error message
11142  */
11143 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11144                                     u64 bytenr, int level)
11145 {
11146         struct btrfs_root *root;
11147         struct btrfs_key key;
11148         struct btrfs_path path;
11149         struct extent_buffer *eb;
11150         struct extent_buffer *node;
11151         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11152         int err = 0;
11153         int ret;
11154
11155         /* Query level for level == -1 special case */
11156         if (level == -1)
11157                 level = query_tree_block_level(fs_info, bytenr);
11158         if (level < 0) {
11159                 err |= REFERENCER_MISSING;
11160                 goto out;
11161         }
11162
11163         key.objectid = root_id;
11164         key.type = BTRFS_ROOT_ITEM_KEY;
11165         key.offset = (u64)-1;
11166
11167         root = btrfs_read_fs_root(fs_info, &key);
11168         if (IS_ERR(root)) {
11169                 err |= REFERENCER_MISSING;
11170                 goto out;
11171         }
11172
11173         /* Read out the tree block to get item/node key */
11174         eb = read_tree_block(fs_info, bytenr, 0);
11175         if (!extent_buffer_uptodate(eb)) {
11176                 err |= REFERENCER_MISSING;
11177                 free_extent_buffer(eb);
11178                 goto out;
11179         }
11180
11181         /* Empty tree, no need to check key */
11182         if (!btrfs_header_nritems(eb) && !level) {
11183                 free_extent_buffer(eb);
11184                 goto out;
11185         }
11186
11187         if (level)
11188                 btrfs_node_key_to_cpu(eb, &key, 0);
11189         else
11190                 btrfs_item_key_to_cpu(eb, &key, 0);
11191
11192         free_extent_buffer(eb);
11193
11194         btrfs_init_path(&path);
11195         path.lowest_level = level;
11196         /* Search with the first key, to ensure we can reach it */
11197         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11198         if (ret < 0) {
11199                 err |= REFERENCER_MISSING;
11200                 goto release_out;
11201         }
11202
11203         node = path.nodes[level];
11204         if (btrfs_header_bytenr(node) != bytenr) {
11205                 error(
11206         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11207                         bytenr, nodesize, bytenr,
11208                         btrfs_header_bytenr(node));
11209                 err |= REFERENCER_MISMATCH;
11210         }
11211         if (btrfs_header_level(node) != level) {
11212                 error(
11213         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11214                         bytenr, nodesize, level,
11215                         btrfs_header_level(node));
11216                 err |= REFERENCER_MISMATCH;
11217         }
11218
11219 release_out:
11220         btrfs_release_path(&path);
11221 out:
11222         if (err & REFERENCER_MISSING) {
11223                 if (level < 0)
11224                         error("extent [%llu %d] lost referencer (owner: %llu)",
11225                                 bytenr, nodesize, root_id);
11226                 else
11227                         error(
11228                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11229                                 bytenr, nodesize, root_id, level);
11230         }
11231
11232         return err;
11233 }
11234
11235 /*
11236  * Check if tree block @eb is tree reloc root.
11237  * Return 0 if it's not or any problem happens
11238  * Return 1 if it's a tree reloc root
11239  */
11240 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11241                                  struct extent_buffer *eb)
11242 {
11243         struct btrfs_root *tree_reloc_root;
11244         struct btrfs_key key;
11245         u64 bytenr = btrfs_header_bytenr(eb);
11246         u64 owner = btrfs_header_owner(eb);
11247         int ret = 0;
11248
11249         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11250         key.offset = owner;
11251         key.type = BTRFS_ROOT_ITEM_KEY;
11252
11253         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11254         if (IS_ERR(tree_reloc_root))
11255                 return 0;
11256
11257         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11258                 ret = 1;
11259         btrfs_free_fs_root(tree_reloc_root);
11260         return ret;
11261 }
11262
11263 /*
11264  * Check referencer for shared block backref
11265  * If level == -1, this function will resolve the level.
11266  */
11267 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11268                                      u64 parent, u64 bytenr, int level)
11269 {
11270         struct extent_buffer *eb;
11271         u32 nr;
11272         int found_parent = 0;
11273         int i;
11274
11275         eb = read_tree_block(fs_info, parent, 0);
11276         if (!extent_buffer_uptodate(eb))
11277                 goto out;
11278
11279         if (level == -1)
11280                 level = query_tree_block_level(fs_info, bytenr);
11281         if (level < 0)
11282                 goto out;
11283
11284         /* It's possible it's a tree reloc root */
11285         if (parent == bytenr) {
11286                 if (is_tree_reloc_root(fs_info, eb))
11287                         found_parent = 1;
11288                 goto out;
11289         }
11290
11291         if (level + 1 != btrfs_header_level(eb))
11292                 goto out;
11293
11294         nr = btrfs_header_nritems(eb);
11295         for (i = 0; i < nr; i++) {
11296                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11297                         found_parent = 1;
11298                         break;
11299                 }
11300         }
11301 out:
11302         free_extent_buffer(eb);
11303         if (!found_parent) {
11304                 error(
11305         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11306                         bytenr, fs_info->nodesize, parent, level);
11307                 return REFERENCER_MISSING;
11308         }
11309         return 0;
11310 }
11311
11312 /*
11313  * Check referencer for normal (inlined) data ref
11314  * If len == 0, it will be resolved by searching in extent tree
11315  */
11316 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11317                                      u64 root_id, u64 objectid, u64 offset,
11318                                      u64 bytenr, u64 len, u32 count)
11319 {
11320         struct btrfs_root *root;
11321         struct btrfs_root *extent_root = fs_info->extent_root;
11322         struct btrfs_key key;
11323         struct btrfs_path path;
11324         struct extent_buffer *leaf;
11325         struct btrfs_file_extent_item *fi;
11326         u32 found_count = 0;
11327         int slot;
11328         int ret = 0;
11329
11330         if (!len) {
11331                 key.objectid = bytenr;
11332                 key.type = BTRFS_EXTENT_ITEM_KEY;
11333                 key.offset = (u64)-1;
11334
11335                 btrfs_init_path(&path);
11336                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11337                 if (ret < 0)
11338                         goto out;
11339                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11340                 if (ret)
11341                         goto out;
11342                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11343                 if (key.objectid != bytenr ||
11344                     key.type != BTRFS_EXTENT_ITEM_KEY)
11345                         goto out;
11346                 len = key.offset;
11347                 btrfs_release_path(&path);
11348         }
11349         key.objectid = root_id;
11350         key.type = BTRFS_ROOT_ITEM_KEY;
11351         key.offset = (u64)-1;
11352         btrfs_init_path(&path);
11353
11354         root = btrfs_read_fs_root(fs_info, &key);
11355         if (IS_ERR(root))
11356                 goto out;
11357
11358         key.objectid = objectid;
11359         key.type = BTRFS_EXTENT_DATA_KEY;
11360         /*
11361          * It can be nasty as data backref offset is
11362          * file offset - file extent offset, which is smaller or
11363          * equal to original backref offset.  The only special case is
11364          * overflow.  So we need to special check and do further search.
11365          */
11366         key.offset = offset & (1ULL << 63) ? 0 : offset;
11367
11368         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11369         if (ret < 0)
11370                 goto out;
11371
11372         /*
11373          * Search afterwards to get correct one
11374          * NOTE: As we must do a comprehensive check on the data backref to
11375          * make sure the dref count also matches, we must iterate all file
11376          * extents for that inode.
11377          */
11378         while (1) {
11379                 leaf = path.nodes[0];
11380                 slot = path.slots[0];
11381
11382                 if (slot >= btrfs_header_nritems(leaf))
11383                         goto next;
11384                 btrfs_item_key_to_cpu(leaf, &key, slot);
11385                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11386                         break;
11387                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11388                 /*
11389                  * Except normal disk bytenr and disk num bytes, we still
11390                  * need to do extra check on dbackref offset as
11391                  * dbackref offset = file_offset - file_extent_offset
11392                  */
11393                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11394                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11395                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11396                     offset)
11397                         found_count++;
11398
11399 next:
11400                 ret = btrfs_next_item(root, &path);
11401                 if (ret)
11402                         break;
11403         }
11404 out:
11405         btrfs_release_path(&path);
11406         if (found_count != count) {
11407                 error(
11408 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11409                         bytenr, len, root_id, objectid, offset, count, found_count);
11410                 return REFERENCER_MISSING;
11411         }
11412         return 0;
11413 }
11414
11415 /*
11416  * Check if the referencer of a shared data backref exists
11417  */
11418 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11419                                      u64 parent, u64 bytenr)
11420 {
11421         struct extent_buffer *eb;
11422         struct btrfs_key key;
11423         struct btrfs_file_extent_item *fi;
11424         u32 nr;
11425         int found_parent = 0;
11426         int i;
11427
11428         eb = read_tree_block(fs_info, parent, 0);
11429         if (!extent_buffer_uptodate(eb))
11430                 goto out;
11431
11432         nr = btrfs_header_nritems(eb);
11433         for (i = 0; i < nr; i++) {
11434                 btrfs_item_key_to_cpu(eb, &key, i);
11435                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11436                         continue;
11437
11438                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11439                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11440                         continue;
11441
11442                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11443                         found_parent = 1;
11444                         break;
11445                 }
11446         }
11447
11448 out:
11449         free_extent_buffer(eb);
11450         if (!found_parent) {
11451                 error("shared extent %llu referencer lost (parent: %llu)",
11452                         bytenr, parent);
11453                 return REFERENCER_MISSING;
11454         }
11455         return 0;
11456 }
11457
11458 /*
11459  * This function will check a given extent item, including its backref and
11460  * itself (like crossing stripe boundary and type)
11461  *
11462  * Since we don't use extent_record anymore, introduce new error bit
11463  */
11464 static int check_extent_item(struct btrfs_fs_info *fs_info,
11465                              struct extent_buffer *eb, int slot)
11466 {
11467         struct btrfs_extent_item *ei;
11468         struct btrfs_extent_inline_ref *iref;
11469         struct btrfs_extent_data_ref *dref;
11470         unsigned long end;
11471         unsigned long ptr;
11472         int type;
11473         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11474         u32 item_size = btrfs_item_size_nr(eb, slot);
11475         u64 flags;
11476         u64 offset;
11477         int metadata = 0;
11478         int level;
11479         struct btrfs_key key;
11480         int ret;
11481         int err = 0;
11482
11483         btrfs_item_key_to_cpu(eb, &key, slot);
11484         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11485                 bytes_used += key.offset;
11486         else
11487                 bytes_used += nodesize;
11488
11489         if (item_size < sizeof(*ei)) {
11490                 /*
11491                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11492                  * old thing when on disk format is still un-determined.
11493                  * No need to care about it anymore
11494                  */
11495                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11496                 return -ENOTTY;
11497         }
11498
11499         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11500         flags = btrfs_extent_flags(eb, ei);
11501
11502         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11503                 metadata = 1;
11504         if (metadata && check_crossing_stripes(global_info, key.objectid,
11505                                                eb->len)) {
11506                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11507                       key.objectid, key.objectid + nodesize);
11508                 err |= CROSSING_STRIPE_BOUNDARY;
11509         }
11510
11511         ptr = (unsigned long)(ei + 1);
11512
11513         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11514                 /* Old EXTENT_ITEM metadata */
11515                 struct btrfs_tree_block_info *info;
11516
11517                 info = (struct btrfs_tree_block_info *)ptr;
11518                 level = btrfs_tree_block_level(eb, info);
11519                 ptr += sizeof(struct btrfs_tree_block_info);
11520         } else {
11521                 /* New METADATA_ITEM */
11522                 level = key.offset;
11523         }
11524         end = (unsigned long)ei + item_size;
11525
11526 next:
11527         /* Reached extent item end normally */
11528         if (ptr == end)
11529                 goto out;
11530
11531         /* Beyond extent item end, wrong item size */
11532         if (ptr > end) {
11533                 err |= ITEM_SIZE_MISMATCH;
11534                 error("extent item at bytenr %llu slot %d has wrong size",
11535                         eb->start, slot);
11536                 goto out;
11537         }
11538
11539         /* Now check every backref in this extent item */
11540         iref = (struct btrfs_extent_inline_ref *)ptr;
11541         type = btrfs_extent_inline_ref_type(eb, iref);
11542         offset = btrfs_extent_inline_ref_offset(eb, iref);
11543         switch (type) {
11544         case BTRFS_TREE_BLOCK_REF_KEY:
11545                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11546                                                level);
11547                 err |= ret;
11548                 break;
11549         case BTRFS_SHARED_BLOCK_REF_KEY:
11550                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11551                                                  level);
11552                 err |= ret;
11553                 break;
11554         case BTRFS_EXTENT_DATA_REF_KEY:
11555                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11556                 ret = check_extent_data_backref(fs_info,
11557                                 btrfs_extent_data_ref_root(eb, dref),
11558                                 btrfs_extent_data_ref_objectid(eb, dref),
11559                                 btrfs_extent_data_ref_offset(eb, dref),
11560                                 key.objectid, key.offset,
11561                                 btrfs_extent_data_ref_count(eb, dref));
11562                 err |= ret;
11563                 break;
11564         case BTRFS_SHARED_DATA_REF_KEY:
11565                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11566                 err |= ret;
11567                 break;
11568         default:
11569                 error("extent[%llu %d %llu] has unknown ref type: %d",
11570                         key.objectid, key.type, key.offset, type);
11571                 err |= UNKNOWN_TYPE;
11572                 goto out;
11573         }
11574
11575         ptr += btrfs_extent_inline_ref_size(type);
11576         goto next;
11577
11578 out:
11579         return err;
11580 }
11581
11582 /*
11583  * Check if a dev extent item is referred correctly by its chunk
11584  */
11585 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11586                                  struct extent_buffer *eb, int slot)
11587 {
11588         struct btrfs_root *chunk_root = fs_info->chunk_root;
11589         struct btrfs_dev_extent *ptr;
11590         struct btrfs_path path;
11591         struct btrfs_key chunk_key;
11592         struct btrfs_key devext_key;
11593         struct btrfs_chunk *chunk;
11594         struct extent_buffer *l;
11595         int num_stripes;
11596         u64 length;
11597         int i;
11598         int found_chunk = 0;
11599         int ret;
11600
11601         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11602         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11603         length = btrfs_dev_extent_length(eb, ptr);
11604
11605         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11606         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11607         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11608
11609         btrfs_init_path(&path);
11610         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11611         if (ret)
11612                 goto out;
11613
11614         l = path.nodes[0];
11615         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11616         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11617                                       chunk_key.offset);
11618         if (ret < 0)
11619                 goto out;
11620
11621         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11622                 goto out;
11623
11624         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11625         for (i = 0; i < num_stripes; i++) {
11626                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11627                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11628
11629                 if (devid == devext_key.objectid &&
11630                     offset == devext_key.offset) {
11631                         found_chunk = 1;
11632                         break;
11633                 }
11634         }
11635 out:
11636         btrfs_release_path(&path);
11637         if (!found_chunk) {
11638                 error(
11639                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11640                         devext_key.objectid, devext_key.offset, length);
11641                 return REFERENCER_MISSING;
11642         }
11643         return 0;
11644 }
11645
11646 /*
11647  * Check if the used space is correct with the dev item
11648  */
11649 static int check_dev_item(struct btrfs_fs_info *fs_info,
11650                           struct extent_buffer *eb, int slot)
11651 {
11652         struct btrfs_root *dev_root = fs_info->dev_root;
11653         struct btrfs_dev_item *dev_item;
11654         struct btrfs_path path;
11655         struct btrfs_key key;
11656         struct btrfs_dev_extent *ptr;
11657         u64 dev_id;
11658         u64 used;
11659         u64 total = 0;
11660         int ret;
11661
11662         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11663         dev_id = btrfs_device_id(eb, dev_item);
11664         used = btrfs_device_bytes_used(eb, dev_item);
11665
11666         key.objectid = dev_id;
11667         key.type = BTRFS_DEV_EXTENT_KEY;
11668         key.offset = 0;
11669
11670         btrfs_init_path(&path);
11671         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11672         if (ret < 0) {
11673                 btrfs_item_key_to_cpu(eb, &key, slot);
11674                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11675                         key.objectid, key.type, key.offset);
11676                 btrfs_release_path(&path);
11677                 return REFERENCER_MISSING;
11678         }
11679
11680         /* Iterate dev_extents to calculate the used space of a device */
11681         while (1) {
11682                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11683                         goto next;
11684
11685                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11686                 if (key.objectid > dev_id)
11687                         break;
11688                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11689                         goto next;
11690
11691                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11692                                      struct btrfs_dev_extent);
11693                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11694 next:
11695                 ret = btrfs_next_item(dev_root, &path);
11696                 if (ret)
11697                         break;
11698         }
11699         btrfs_release_path(&path);
11700
11701         if (used != total) {
11702                 btrfs_item_key_to_cpu(eb, &key, slot);
11703                 error(
11704 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11705                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11706                         BTRFS_DEV_EXTENT_KEY, dev_id);
11707                 return ACCOUNTING_MISMATCH;
11708         }
11709         return 0;
11710 }
11711
11712 /*
11713  * Check a block group item with its referener (chunk) and its used space
11714  * with extent/metadata item
11715  */
11716 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11717                                   struct extent_buffer *eb, int slot)
11718 {
11719         struct btrfs_root *extent_root = fs_info->extent_root;
11720         struct btrfs_root *chunk_root = fs_info->chunk_root;
11721         struct btrfs_block_group_item *bi;
11722         struct btrfs_block_group_item bg_item;
11723         struct btrfs_path path;
11724         struct btrfs_key bg_key;
11725         struct btrfs_key chunk_key;
11726         struct btrfs_key extent_key;
11727         struct btrfs_chunk *chunk;
11728         struct extent_buffer *leaf;
11729         struct btrfs_extent_item *ei;
11730         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11731         u64 flags;
11732         u64 bg_flags;
11733         u64 used;
11734         u64 total = 0;
11735         int ret;
11736         int err = 0;
11737
11738         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11739         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11740         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11741         used = btrfs_block_group_used(&bg_item);
11742         bg_flags = btrfs_block_group_flags(&bg_item);
11743
11744         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11745         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11746         chunk_key.offset = bg_key.objectid;
11747
11748         btrfs_init_path(&path);
11749         /* Search for the referencer chunk */
11750         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11751         if (ret) {
11752                 error(
11753                 "block group[%llu %llu] did not find the related chunk item",
11754                         bg_key.objectid, bg_key.offset);
11755                 err |= REFERENCER_MISSING;
11756         } else {
11757                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11758                                         struct btrfs_chunk);
11759                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11760                                                 bg_key.offset) {
11761                         error(
11762         "block group[%llu %llu] related chunk item length does not match",
11763                                 bg_key.objectid, bg_key.offset);
11764                         err |= REFERENCER_MISMATCH;
11765                 }
11766         }
11767         btrfs_release_path(&path);
11768
11769         /* Search from the block group bytenr */
11770         extent_key.objectid = bg_key.objectid;
11771         extent_key.type = 0;
11772         extent_key.offset = 0;
11773
11774         btrfs_init_path(&path);
11775         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11776         if (ret < 0)
11777                 goto out;
11778
11779         /* Iterate extent tree to account used space */
11780         while (1) {
11781                 leaf = path.nodes[0];
11782
11783                 /* Search slot can point to the last item beyond leaf nritems */
11784                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11785                         goto next;
11786
11787                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11788                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11789                         break;
11790
11791                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11792                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11793                         goto next;
11794                 if (extent_key.objectid < bg_key.objectid)
11795                         goto next;
11796
11797                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11798                         total += nodesize;
11799                 else
11800                         total += extent_key.offset;
11801
11802                 ei = btrfs_item_ptr(leaf, path.slots[0],
11803                                     struct btrfs_extent_item);
11804                 flags = btrfs_extent_flags(leaf, ei);
11805                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11806                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11807                                 error(
11808                         "bad extent[%llu, %llu) type mismatch with chunk",
11809                                         extent_key.objectid,
11810                                         extent_key.objectid + extent_key.offset);
11811                                 err |= CHUNK_TYPE_MISMATCH;
11812                         }
11813                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11814                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11815                                     BTRFS_BLOCK_GROUP_METADATA))) {
11816                                 error(
11817                         "bad extent[%llu, %llu) type mismatch with chunk",
11818                                         extent_key.objectid,
11819                                         extent_key.objectid + nodesize);
11820                                 err |= CHUNK_TYPE_MISMATCH;
11821                         }
11822                 }
11823 next:
11824                 ret = btrfs_next_item(extent_root, &path);
11825                 if (ret)
11826                         break;
11827         }
11828
11829 out:
11830         btrfs_release_path(&path);
11831
11832         if (total != used) {
11833                 error(
11834                 "block group[%llu %llu] used %llu but extent items used %llu",
11835                         bg_key.objectid, bg_key.offset, used, total);
11836                 err |= ACCOUNTING_MISMATCH;
11837         }
11838         return err;
11839 }
11840
11841 /*
11842  * Check a chunk item.
11843  * Including checking all referred dev_extents and block group
11844  */
11845 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11846                             struct extent_buffer *eb, int slot)
11847 {
11848         struct btrfs_root *extent_root = fs_info->extent_root;
11849         struct btrfs_root *dev_root = fs_info->dev_root;
11850         struct btrfs_path path;
11851         struct btrfs_key chunk_key;
11852         struct btrfs_key bg_key;
11853         struct btrfs_key devext_key;
11854         struct btrfs_chunk *chunk;
11855         struct extent_buffer *leaf;
11856         struct btrfs_block_group_item *bi;
11857         struct btrfs_block_group_item bg_item;
11858         struct btrfs_dev_extent *ptr;
11859         u64 length;
11860         u64 chunk_end;
11861         u64 stripe_len;
11862         u64 type;
11863         int num_stripes;
11864         u64 offset;
11865         u64 objectid;
11866         int i;
11867         int ret;
11868         int err = 0;
11869
11870         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11871         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11872         length = btrfs_chunk_length(eb, chunk);
11873         chunk_end = chunk_key.offset + length;
11874         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11875                                       chunk_key.offset);
11876         if (ret < 0) {
11877                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11878                         chunk_end);
11879                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11880                 goto out;
11881         }
11882         type = btrfs_chunk_type(eb, chunk);
11883
11884         bg_key.objectid = chunk_key.offset;
11885         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11886         bg_key.offset = length;
11887
11888         btrfs_init_path(&path);
11889         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11890         if (ret) {
11891                 error(
11892                 "chunk[%llu %llu) did not find the related block group item",
11893                         chunk_key.offset, chunk_end);
11894                 err |= REFERENCER_MISSING;
11895         } else{
11896                 leaf = path.nodes[0];
11897                 bi = btrfs_item_ptr(leaf, path.slots[0],
11898                                     struct btrfs_block_group_item);
11899                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11900                                    sizeof(bg_item));
11901                 if (btrfs_block_group_flags(&bg_item) != type) {
11902                         error(
11903 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11904                                 chunk_key.offset, chunk_end, type,
11905                                 btrfs_block_group_flags(&bg_item));
11906                         err |= REFERENCER_MISSING;
11907                 }
11908         }
11909
11910         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11911         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11912         for (i = 0; i < num_stripes; i++) {
11913                 btrfs_release_path(&path);
11914                 btrfs_init_path(&path);
11915                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11916                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11917                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11918
11919                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11920                                         0, 0);
11921                 if (ret)
11922                         goto not_match_dev;
11923
11924                 leaf = path.nodes[0];
11925                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11926                                      struct btrfs_dev_extent);
11927                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11928                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11929                 if (objectid != chunk_key.objectid ||
11930                     offset != chunk_key.offset ||
11931                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11932                         goto not_match_dev;
11933                 continue;
11934 not_match_dev:
11935                 err |= BACKREF_MISSING;
11936                 error(
11937                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11938                         chunk_key.objectid, chunk_end, i);
11939                 continue;
11940         }
11941         btrfs_release_path(&path);
11942 out:
11943         return err;
11944 }
11945
11946 /*
11947  * Main entry function to check known items and update related accounting info
11948  */
11949 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11950 {
11951         struct btrfs_fs_info *fs_info = root->fs_info;
11952         struct btrfs_key key;
11953         int slot = 0;
11954         int type;
11955         struct btrfs_extent_data_ref *dref;
11956         int ret;
11957         int err = 0;
11958
11959 next:
11960         btrfs_item_key_to_cpu(eb, &key, slot);
11961         type = key.type;
11962
11963         switch (type) {
11964         case BTRFS_EXTENT_DATA_KEY:
11965                 ret = check_extent_data_item(root, eb, slot);
11966                 err |= ret;
11967                 break;
11968         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11969                 ret = check_block_group_item(fs_info, eb, slot);
11970                 err |= ret;
11971                 break;
11972         case BTRFS_DEV_ITEM_KEY:
11973                 ret = check_dev_item(fs_info, eb, slot);
11974                 err |= ret;
11975                 break;
11976         case BTRFS_CHUNK_ITEM_KEY:
11977                 ret = check_chunk_item(fs_info, eb, slot);
11978                 err |= ret;
11979                 break;
11980         case BTRFS_DEV_EXTENT_KEY:
11981                 ret = check_dev_extent_item(fs_info, eb, slot);
11982                 err |= ret;
11983                 break;
11984         case BTRFS_EXTENT_ITEM_KEY:
11985         case BTRFS_METADATA_ITEM_KEY:
11986                 ret = check_extent_item(fs_info, eb, slot);
11987                 err |= ret;
11988                 break;
11989         case BTRFS_EXTENT_CSUM_KEY:
11990                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11991                 break;
11992         case BTRFS_TREE_BLOCK_REF_KEY:
11993                 ret = check_tree_block_backref(fs_info, key.offset,
11994                                                key.objectid, -1);
11995                 err |= ret;
11996                 break;
11997         case BTRFS_EXTENT_DATA_REF_KEY:
11998                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11999                 ret = check_extent_data_backref(fs_info,
12000                                 btrfs_extent_data_ref_root(eb, dref),
12001                                 btrfs_extent_data_ref_objectid(eb, dref),
12002                                 btrfs_extent_data_ref_offset(eb, dref),
12003                                 key.objectid, 0,
12004                                 btrfs_extent_data_ref_count(eb, dref));
12005                 err |= ret;
12006                 break;
12007         case BTRFS_SHARED_BLOCK_REF_KEY:
12008                 ret = check_shared_block_backref(fs_info, key.offset,
12009                                                  key.objectid, -1);
12010                 err |= ret;
12011                 break;
12012         case BTRFS_SHARED_DATA_REF_KEY:
12013                 ret = check_shared_data_backref(fs_info, key.offset,
12014                                                 key.objectid);
12015                 err |= ret;
12016                 break;
12017         default:
12018                 break;
12019         }
12020
12021         if (++slot < btrfs_header_nritems(eb))
12022                 goto next;
12023
12024         return err;
12025 }
12026
12027 /*
12028  * Helper function for later fs/subvol tree check.  To determine if a tree
12029  * block should be checked.
12030  * This function will ensure only the direct referencer with lowest rootid to
12031  * check a fs/subvolume tree block.
12032  *
12033  * Backref check at extent tree would detect errors like missing subvolume
12034  * tree, so we can do aggressive check to reduce duplicated checks.
12035  */
12036 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12037 {
12038         struct btrfs_root *extent_root = root->fs_info->extent_root;
12039         struct btrfs_key key;
12040         struct btrfs_path path;
12041         struct extent_buffer *leaf;
12042         int slot;
12043         struct btrfs_extent_item *ei;
12044         unsigned long ptr;
12045         unsigned long end;
12046         int type;
12047         u32 item_size;
12048         u64 offset;
12049         struct btrfs_extent_inline_ref *iref;
12050         int ret;
12051
12052         btrfs_init_path(&path);
12053         key.objectid = btrfs_header_bytenr(eb);
12054         key.type = BTRFS_METADATA_ITEM_KEY;
12055         key.offset = (u64)-1;
12056
12057         /*
12058          * Any failure in backref resolving means we can't determine
12059          * whom the tree block belongs to.
12060          * So in that case, we need to check that tree block
12061          */
12062         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12063         if (ret < 0)
12064                 goto need_check;
12065
12066         ret = btrfs_previous_extent_item(extent_root, &path,
12067                                          btrfs_header_bytenr(eb));
12068         if (ret)
12069                 goto need_check;
12070
12071         leaf = path.nodes[0];
12072         slot = path.slots[0];
12073         btrfs_item_key_to_cpu(leaf, &key, slot);
12074         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12075
12076         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12077                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12078         } else {
12079                 struct btrfs_tree_block_info *info;
12080
12081                 info = (struct btrfs_tree_block_info *)(ei + 1);
12082                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12083         }
12084
12085         item_size = btrfs_item_size_nr(leaf, slot);
12086         ptr = (unsigned long)iref;
12087         end = (unsigned long)ei + item_size;
12088         while (ptr < end) {
12089                 iref = (struct btrfs_extent_inline_ref *)ptr;
12090                 type = btrfs_extent_inline_ref_type(leaf, iref);
12091                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12092
12093                 /*
12094                  * We only check the tree block if current root is
12095                  * the lowest referencer of it.
12096                  */
12097                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12098                     offset < root->objectid) {
12099                         btrfs_release_path(&path);
12100                         return 0;
12101                 }
12102
12103                 ptr += btrfs_extent_inline_ref_size(type);
12104         }
12105         /*
12106          * Normally we should also check keyed tree block ref, but that may be
12107          * very time consuming.  Inlined ref should already make us skip a lot
12108          * of refs now.  So skip search keyed tree block ref.
12109          */
12110
12111 need_check:
12112         btrfs_release_path(&path);
12113         return 1;
12114 }
12115
12116 /*
12117  * Traversal function for tree block. We will do:
12118  * 1) Skip shared fs/subvolume tree blocks
12119  * 2) Update related bytes accounting
12120  * 3) Pre-order traversal
12121  */
12122 static int traverse_tree_block(struct btrfs_root *root,
12123                                 struct extent_buffer *node)
12124 {
12125         struct extent_buffer *eb;
12126         struct btrfs_key key;
12127         struct btrfs_key drop_key;
12128         int level;
12129         u64 nr;
12130         int i;
12131         int err = 0;
12132         int ret;
12133
12134         /*
12135          * Skip shared fs/subvolume tree block, in that case they will
12136          * be checked by referencer with lowest rootid
12137          */
12138         if (is_fstree(root->objectid) && !should_check(root, node))
12139                 return 0;
12140
12141         /* Update bytes accounting */
12142         total_btree_bytes += node->len;
12143         if (fs_root_objectid(btrfs_header_owner(node)))
12144                 total_fs_tree_bytes += node->len;
12145         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12146                 total_extent_tree_bytes += node->len;
12147
12148         /* pre-order tranversal, check itself first */
12149         level = btrfs_header_level(node);
12150         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12151                                    btrfs_header_level(node),
12152                                    btrfs_header_owner(node));
12153         err |= ret;
12154         if (err)
12155                 error(
12156         "check %s failed root %llu bytenr %llu level %d, force continue check",
12157                         level ? "node":"leaf", root->objectid,
12158                         btrfs_header_bytenr(node), btrfs_header_level(node));
12159
12160         if (!level) {
12161                 btree_space_waste += btrfs_leaf_free_space(root, node);
12162                 ret = check_leaf_items(root, node);
12163                 err |= ret;
12164                 return err;
12165         }
12166
12167         nr = btrfs_header_nritems(node);
12168         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12169         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12170                 sizeof(struct btrfs_key_ptr);
12171
12172         /* Then check all its children */
12173         for (i = 0; i < nr; i++) {
12174                 u64 blocknr = btrfs_node_blockptr(node, i);
12175
12176                 btrfs_node_key_to_cpu(node, &key, i);
12177                 if (level == root->root_item.drop_level &&
12178                     is_dropped_key(&key, &drop_key))
12179                         continue;
12180
12181                 /*
12182                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12183                  * to call the function itself.
12184                  */
12185                 eb = read_tree_block(root->fs_info, blocknr, 0);
12186                 if (extent_buffer_uptodate(eb)) {
12187                         ret = traverse_tree_block(root, eb);
12188                         err |= ret;
12189                 }
12190                 free_extent_buffer(eb);
12191         }
12192
12193         return err;
12194 }
12195
12196 /*
12197  * Low memory usage version check_chunks_and_extents.
12198  */
12199 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12200 {
12201         struct btrfs_path path;
12202         struct btrfs_key key;
12203         struct btrfs_root *root1;
12204         struct btrfs_root *root;
12205         struct btrfs_root *cur_root;
12206         int err = 0;
12207         int ret;
12208
12209         root = fs_info->fs_root;
12210
12211         root1 = root->fs_info->chunk_root;
12212         ret = traverse_tree_block(root1, root1->node);
12213         err |= ret;
12214
12215         root1 = root->fs_info->tree_root;
12216         ret = traverse_tree_block(root1, root1->node);
12217         err |= ret;
12218
12219         btrfs_init_path(&path);
12220         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12221         key.offset = 0;
12222         key.type = BTRFS_ROOT_ITEM_KEY;
12223
12224         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12225         if (ret) {
12226                 error("cannot find extent treet in tree_root");
12227                 goto out;
12228         }
12229
12230         while (1) {
12231                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12232                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12233                         goto next;
12234                 key.offset = (u64)-1;
12235
12236                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12237                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12238                                         &key);
12239                 else
12240                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12241                 if (IS_ERR(cur_root) || !cur_root) {
12242                         error("failed to read tree: %lld", key.objectid);
12243                         goto next;
12244                 }
12245
12246                 ret = traverse_tree_block(cur_root, cur_root->node);
12247                 err |= ret;
12248
12249                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12250                         btrfs_free_fs_root(cur_root);
12251 next:
12252                 ret = btrfs_next_item(root1, &path);
12253                 if (ret)
12254                         goto out;
12255         }
12256
12257 out:
12258         btrfs_release_path(&path);
12259         return err;
12260 }
12261
12262 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12263 {
12264         int ret;
12265
12266         if (!ctx.progress_enabled)
12267                 fprintf(stderr, "checking extents\n");
12268         if (check_mode == CHECK_MODE_LOWMEM)
12269                 ret = check_chunks_and_extents_v2(fs_info);
12270         else
12271                 ret = check_chunks_and_extents(fs_info);
12272
12273         return ret;
12274 }
12275
12276 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12277                            struct btrfs_root *root, int overwrite)
12278 {
12279         struct extent_buffer *c;
12280         struct extent_buffer *old = root->node;
12281         int level;
12282         int ret;
12283         struct btrfs_disk_key disk_key = {0,0,0};
12284
12285         level = 0;
12286
12287         if (overwrite) {
12288                 c = old;
12289                 extent_buffer_get(c);
12290                 goto init;
12291         }
12292         c = btrfs_alloc_free_block(trans, root,
12293                                    root->fs_info->nodesize,
12294                                    root->root_key.objectid,
12295                                    &disk_key, level, 0, 0);
12296         if (IS_ERR(c)) {
12297                 c = old;
12298                 extent_buffer_get(c);
12299                 overwrite = 1;
12300         }
12301 init:
12302         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12303         btrfs_set_header_level(c, level);
12304         btrfs_set_header_bytenr(c, c->start);
12305         btrfs_set_header_generation(c, trans->transid);
12306         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12307         btrfs_set_header_owner(c, root->root_key.objectid);
12308
12309         write_extent_buffer(c, root->fs_info->fsid,
12310                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12311
12312         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12313                             btrfs_header_chunk_tree_uuid(c),
12314                             BTRFS_UUID_SIZE);
12315
12316         btrfs_mark_buffer_dirty(c);
12317         /*
12318          * this case can happen in the following case:
12319          *
12320          * 1.overwrite previous root.
12321          *
12322          * 2.reinit reloc data root, this is because we skip pin
12323          * down reloc data tree before which means we can allocate
12324          * same block bytenr here.
12325          */
12326         if (old->start == c->start) {
12327                 btrfs_set_root_generation(&root->root_item,
12328                                           trans->transid);
12329                 root->root_item.level = btrfs_header_level(root->node);
12330                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12331                                         &root->root_key, &root->root_item);
12332                 if (ret) {
12333                         free_extent_buffer(c);
12334                         return ret;
12335                 }
12336         }
12337         free_extent_buffer(old);
12338         root->node = c;
12339         add_root_to_dirty_list(root);
12340         return 0;
12341 }
12342
12343 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12344                                 struct extent_buffer *eb, int tree_root)
12345 {
12346         struct extent_buffer *tmp;
12347         struct btrfs_root_item *ri;
12348         struct btrfs_key key;
12349         u64 bytenr;
12350         int level = btrfs_header_level(eb);
12351         int nritems;
12352         int ret;
12353         int i;
12354
12355         /*
12356          * If we have pinned this block before, don't pin it again.
12357          * This can not only avoid forever loop with broken filesystem
12358          * but also give us some speedups.
12359          */
12360         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12361                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12362                 return 0;
12363
12364         btrfs_pin_extent(fs_info, eb->start, eb->len);
12365
12366         nritems = btrfs_header_nritems(eb);
12367         for (i = 0; i < nritems; i++) {
12368                 if (level == 0) {
12369                         btrfs_item_key_to_cpu(eb, &key, i);
12370                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12371                                 continue;
12372                         /* Skip the extent root and reloc roots */
12373                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12374                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12375                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12376                                 continue;
12377                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12378                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12379
12380                         /*
12381                          * If at any point we start needing the real root we
12382                          * will have to build a stump root for the root we are
12383                          * in, but for now this doesn't actually use the root so
12384                          * just pass in extent_root.
12385                          */
12386                         tmp = read_tree_block(fs_info, bytenr, 0);
12387                         if (!extent_buffer_uptodate(tmp)) {
12388                                 fprintf(stderr, "Error reading root block\n");
12389                                 return -EIO;
12390                         }
12391                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12392                         free_extent_buffer(tmp);
12393                         if (ret)
12394                                 return ret;
12395                 } else {
12396                         bytenr = btrfs_node_blockptr(eb, i);
12397
12398                         /* If we aren't the tree root don't read the block */
12399                         if (level == 1 && !tree_root) {
12400                                 btrfs_pin_extent(fs_info, bytenr,
12401                                                 fs_info->nodesize);
12402                                 continue;
12403                         }
12404
12405                         tmp = read_tree_block(fs_info, bytenr, 0);
12406                         if (!extent_buffer_uptodate(tmp)) {
12407                                 fprintf(stderr, "Error reading tree block\n");
12408                                 return -EIO;
12409                         }
12410                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12411                         free_extent_buffer(tmp);
12412                         if (ret)
12413                                 return ret;
12414                 }
12415         }
12416
12417         return 0;
12418 }
12419
12420 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12421 {
12422         int ret;
12423
12424         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12425         if (ret)
12426                 return ret;
12427
12428         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12429 }
12430
12431 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12432 {
12433         struct btrfs_block_group_cache *cache;
12434         struct btrfs_path path;
12435         struct extent_buffer *leaf;
12436         struct btrfs_chunk *chunk;
12437         struct btrfs_key key;
12438         int ret;
12439         u64 start;
12440
12441         btrfs_init_path(&path);
12442         key.objectid = 0;
12443         key.type = BTRFS_CHUNK_ITEM_KEY;
12444         key.offset = 0;
12445         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12446         if (ret < 0) {
12447                 btrfs_release_path(&path);
12448                 return ret;
12449         }
12450
12451         /*
12452          * We do this in case the block groups were screwed up and had alloc
12453          * bits that aren't actually set on the chunks.  This happens with
12454          * restored images every time and could happen in real life I guess.
12455          */
12456         fs_info->avail_data_alloc_bits = 0;
12457         fs_info->avail_metadata_alloc_bits = 0;
12458         fs_info->avail_system_alloc_bits = 0;
12459
12460         /* First we need to create the in-memory block groups */
12461         while (1) {
12462                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12463                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12464                         if (ret < 0) {
12465                                 btrfs_release_path(&path);
12466                                 return ret;
12467                         }
12468                         if (ret) {
12469                                 ret = 0;
12470                                 break;
12471                         }
12472                 }
12473                 leaf = path.nodes[0];
12474                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12475                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12476                         path.slots[0]++;
12477                         continue;
12478                 }
12479
12480                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12481                 btrfs_add_block_group(fs_info, 0,
12482                                       btrfs_chunk_type(leaf, chunk),
12483                                       key.objectid, key.offset,
12484                                       btrfs_chunk_length(leaf, chunk));
12485                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12486                                  key.offset + btrfs_chunk_length(leaf, chunk));
12487                 path.slots[0]++;
12488         }
12489         start = 0;
12490         while (1) {
12491                 cache = btrfs_lookup_first_block_group(fs_info, start);
12492                 if (!cache)
12493                         break;
12494                 cache->cached = 1;
12495                 start = cache->key.objectid + cache->key.offset;
12496         }
12497
12498         btrfs_release_path(&path);
12499         return 0;
12500 }
12501
12502 static int reset_balance(struct btrfs_trans_handle *trans,
12503                          struct btrfs_fs_info *fs_info)
12504 {
12505         struct btrfs_root *root = fs_info->tree_root;
12506         struct btrfs_path path;
12507         struct extent_buffer *leaf;
12508         struct btrfs_key key;
12509         int del_slot, del_nr = 0;
12510         int ret;
12511         int found = 0;
12512
12513         btrfs_init_path(&path);
12514         key.objectid = BTRFS_BALANCE_OBJECTID;
12515         key.type = BTRFS_BALANCE_ITEM_KEY;
12516         key.offset = 0;
12517         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12518         if (ret) {
12519                 if (ret > 0)
12520                         ret = 0;
12521                 if (!ret)
12522                         goto reinit_data_reloc;
12523                 else
12524                         goto out;
12525         }
12526
12527         ret = btrfs_del_item(trans, root, &path);
12528         if (ret)
12529                 goto out;
12530         btrfs_release_path(&path);
12531
12532         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12533         key.type = BTRFS_ROOT_ITEM_KEY;
12534         key.offset = 0;
12535         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12536         if (ret < 0)
12537                 goto out;
12538         while (1) {
12539                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12540                         if (!found)
12541                                 break;
12542
12543                         if (del_nr) {
12544                                 ret = btrfs_del_items(trans, root, &path,
12545                                                       del_slot, del_nr);
12546                                 del_nr = 0;
12547                                 if (ret)
12548                                         goto out;
12549                         }
12550                         key.offset++;
12551                         btrfs_release_path(&path);
12552
12553                         found = 0;
12554                         ret = btrfs_search_slot(trans, root, &key, &path,
12555                                                 -1, 1);
12556                         if (ret < 0)
12557                                 goto out;
12558                         continue;
12559                 }
12560                 found = 1;
12561                 leaf = path.nodes[0];
12562                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12563                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12564                         break;
12565                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12566                         path.slots[0]++;
12567                         continue;
12568                 }
12569                 if (!del_nr) {
12570                         del_slot = path.slots[0];
12571                         del_nr = 1;
12572                 } else {
12573                         del_nr++;
12574                 }
12575                 path.slots[0]++;
12576         }
12577
12578         if (del_nr) {
12579                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12580                 if (ret)
12581                         goto out;
12582         }
12583         btrfs_release_path(&path);
12584
12585 reinit_data_reloc:
12586         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12587         key.type = BTRFS_ROOT_ITEM_KEY;
12588         key.offset = (u64)-1;
12589         root = btrfs_read_fs_root(fs_info, &key);
12590         if (IS_ERR(root)) {
12591                 fprintf(stderr, "Error reading data reloc tree\n");
12592                 ret = PTR_ERR(root);
12593                 goto out;
12594         }
12595         record_root_in_trans(trans, root);
12596         ret = btrfs_fsck_reinit_root(trans, root, 0);
12597         if (ret)
12598                 goto out;
12599         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12600 out:
12601         btrfs_release_path(&path);
12602         return ret;
12603 }
12604
12605 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12606                               struct btrfs_fs_info *fs_info)
12607 {
12608         u64 start = 0;
12609         int ret;
12610
12611         /*
12612          * The only reason we don't do this is because right now we're just
12613          * walking the trees we find and pinning down their bytes, we don't look
12614          * at any of the leaves.  In order to do mixed groups we'd have to check
12615          * the leaves of any fs roots and pin down the bytes for any file
12616          * extents we find.  Not hard but why do it if we don't have to?
12617          */
12618         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12619                 fprintf(stderr, "We don't support re-initing the extent tree "
12620                         "for mixed block groups yet, please notify a btrfs "
12621                         "developer you want to do this so they can add this "
12622                         "functionality.\n");
12623                 return -EINVAL;
12624         }
12625
12626         /*
12627          * first we need to walk all of the trees except the extent tree and pin
12628          * down the bytes that are in use so we don't overwrite any existing
12629          * metadata.
12630          */
12631         ret = pin_metadata_blocks(fs_info);
12632         if (ret) {
12633                 fprintf(stderr, "error pinning down used bytes\n");
12634                 return ret;
12635         }
12636
12637         /*
12638          * Need to drop all the block groups since we're going to recreate all
12639          * of them again.
12640          */
12641         btrfs_free_block_groups(fs_info);
12642         ret = reset_block_groups(fs_info);
12643         if (ret) {
12644                 fprintf(stderr, "error resetting the block groups\n");
12645                 return ret;
12646         }
12647
12648         /* Ok we can allocate now, reinit the extent root */
12649         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12650         if (ret) {
12651                 fprintf(stderr, "extent root initialization failed\n");
12652                 /*
12653                  * When the transaction code is updated we should end the
12654                  * transaction, but for now progs only knows about commit so
12655                  * just return an error.
12656                  */
12657                 return ret;
12658         }
12659
12660         /*
12661          * Now we have all the in-memory block groups setup so we can make
12662          * allocations properly, and the metadata we care about is safe since we
12663          * pinned all of it above.
12664          */
12665         while (1) {
12666                 struct btrfs_block_group_cache *cache;
12667
12668                 cache = btrfs_lookup_first_block_group(fs_info, start);
12669                 if (!cache)
12670                         break;
12671                 start = cache->key.objectid + cache->key.offset;
12672                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12673                                         &cache->key, &cache->item,
12674                                         sizeof(cache->item));
12675                 if (ret) {
12676                         fprintf(stderr, "Error adding block group\n");
12677                         return ret;
12678                 }
12679                 btrfs_extent_post_op(trans, fs_info->extent_root);
12680         }
12681
12682         ret = reset_balance(trans, fs_info);
12683         if (ret)
12684                 fprintf(stderr, "error resetting the pending balance\n");
12685
12686         return ret;
12687 }
12688
12689 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12690 {
12691         struct btrfs_path path;
12692         struct btrfs_trans_handle *trans;
12693         struct btrfs_key key;
12694         int ret;
12695
12696         printf("Recowing metadata block %llu\n", eb->start);
12697         key.objectid = btrfs_header_owner(eb);
12698         key.type = BTRFS_ROOT_ITEM_KEY;
12699         key.offset = (u64)-1;
12700
12701         root = btrfs_read_fs_root(root->fs_info, &key);
12702         if (IS_ERR(root)) {
12703                 fprintf(stderr, "Couldn't find owner root %llu\n",
12704                         key.objectid);
12705                 return PTR_ERR(root);
12706         }
12707
12708         trans = btrfs_start_transaction(root, 1);
12709         if (IS_ERR(trans))
12710                 return PTR_ERR(trans);
12711
12712         btrfs_init_path(&path);
12713         path.lowest_level = btrfs_header_level(eb);
12714         if (path.lowest_level)
12715                 btrfs_node_key_to_cpu(eb, &key, 0);
12716         else
12717                 btrfs_item_key_to_cpu(eb, &key, 0);
12718
12719         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12720         btrfs_commit_transaction(trans, root);
12721         btrfs_release_path(&path);
12722         return ret;
12723 }
12724
12725 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12726 {
12727         struct btrfs_path path;
12728         struct btrfs_trans_handle *trans;
12729         struct btrfs_key key;
12730         int ret;
12731
12732         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12733                bad->key.type, bad->key.offset);
12734         key.objectid = bad->root_id;
12735         key.type = BTRFS_ROOT_ITEM_KEY;
12736         key.offset = (u64)-1;
12737
12738         root = btrfs_read_fs_root(root->fs_info, &key);
12739         if (IS_ERR(root)) {
12740                 fprintf(stderr, "Couldn't find owner root %llu\n",
12741                         key.objectid);
12742                 return PTR_ERR(root);
12743         }
12744
12745         trans = btrfs_start_transaction(root, 1);
12746         if (IS_ERR(trans))
12747                 return PTR_ERR(trans);
12748
12749         btrfs_init_path(&path);
12750         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12751         if (ret) {
12752                 if (ret > 0)
12753                         ret = 0;
12754                 goto out;
12755         }
12756         ret = btrfs_del_item(trans, root, &path);
12757 out:
12758         btrfs_commit_transaction(trans, root);
12759         btrfs_release_path(&path);
12760         return ret;
12761 }
12762
12763 static int zero_log_tree(struct btrfs_root *root)
12764 {
12765         struct btrfs_trans_handle *trans;
12766         int ret;
12767
12768         trans = btrfs_start_transaction(root, 1);
12769         if (IS_ERR(trans)) {
12770                 ret = PTR_ERR(trans);
12771                 return ret;
12772         }
12773         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12774         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12775         ret = btrfs_commit_transaction(trans, root);
12776         return ret;
12777 }
12778
12779 static int populate_csum(struct btrfs_trans_handle *trans,
12780                          struct btrfs_root *csum_root, char *buf, u64 start,
12781                          u64 len)
12782 {
12783         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12784         u64 offset = 0;
12785         u64 sectorsize;
12786         int ret = 0;
12787
12788         while (offset < len) {
12789                 sectorsize = fs_info->sectorsize;
12790                 ret = read_extent_data(fs_info, buf, start + offset,
12791                                        &sectorsize, 0);
12792                 if (ret)
12793                         break;
12794                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12795                                             start + offset, buf, sectorsize);
12796                 if (ret)
12797                         break;
12798                 offset += sectorsize;
12799         }
12800         return ret;
12801 }
12802
12803 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12804                                       struct btrfs_root *csum_root,
12805                                       struct btrfs_root *cur_root)
12806 {
12807         struct btrfs_path path;
12808         struct btrfs_key key;
12809         struct extent_buffer *node;
12810         struct btrfs_file_extent_item *fi;
12811         char *buf = NULL;
12812         u64 start = 0;
12813         u64 len = 0;
12814         int slot = 0;
12815         int ret = 0;
12816
12817         buf = malloc(cur_root->fs_info->sectorsize);
12818         if (!buf)
12819                 return -ENOMEM;
12820
12821         btrfs_init_path(&path);
12822         key.objectid = 0;
12823         key.offset = 0;
12824         key.type = 0;
12825         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12826         if (ret < 0)
12827                 goto out;
12828         /* Iterate all regular file extents and fill its csum */
12829         while (1) {
12830                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12831
12832                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12833                         goto next;
12834                 node = path.nodes[0];
12835                 slot = path.slots[0];
12836                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12837                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12838                         goto next;
12839                 start = btrfs_file_extent_disk_bytenr(node, fi);
12840                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12841
12842                 ret = populate_csum(trans, csum_root, buf, start, len);
12843                 if (ret == -EEXIST)
12844                         ret = 0;
12845                 if (ret < 0)
12846                         goto out;
12847 next:
12848                 /*
12849                  * TODO: if next leaf is corrupted, jump to nearest next valid
12850                  * leaf.
12851                  */
12852                 ret = btrfs_next_item(cur_root, &path);
12853                 if (ret < 0)
12854                         goto out;
12855                 if (ret > 0) {
12856                         ret = 0;
12857                         goto out;
12858                 }
12859         }
12860
12861 out:
12862         btrfs_release_path(&path);
12863         free(buf);
12864         return ret;
12865 }
12866
12867 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12868                                   struct btrfs_root *csum_root)
12869 {
12870         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12871         struct btrfs_path path;
12872         struct btrfs_root *tree_root = fs_info->tree_root;
12873         struct btrfs_root *cur_root;
12874         struct extent_buffer *node;
12875         struct btrfs_key key;
12876         int slot = 0;
12877         int ret = 0;
12878
12879         btrfs_init_path(&path);
12880         key.objectid = BTRFS_FS_TREE_OBJECTID;
12881         key.offset = 0;
12882         key.type = BTRFS_ROOT_ITEM_KEY;
12883         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12884         if (ret < 0)
12885                 goto out;
12886         if (ret > 0) {
12887                 ret = -ENOENT;
12888                 goto out;
12889         }
12890
12891         while (1) {
12892                 node = path.nodes[0];
12893                 slot = path.slots[0];
12894                 btrfs_item_key_to_cpu(node, &key, slot);
12895                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12896                         goto out;
12897                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12898                         goto next;
12899                 if (!is_fstree(key.objectid))
12900                         goto next;
12901                 key.offset = (u64)-1;
12902
12903                 cur_root = btrfs_read_fs_root(fs_info, &key);
12904                 if (IS_ERR(cur_root) || !cur_root) {
12905                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12906                                 key.objectid);
12907                         goto out;
12908                 }
12909                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12910                                 cur_root);
12911                 if (ret < 0)
12912                         goto out;
12913 next:
12914                 ret = btrfs_next_item(tree_root, &path);
12915                 if (ret > 0) {
12916                         ret = 0;
12917                         goto out;
12918                 }
12919                 if (ret < 0)
12920                         goto out;
12921         }
12922
12923 out:
12924         btrfs_release_path(&path);
12925         return ret;
12926 }
12927
12928 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12929                                       struct btrfs_root *csum_root)
12930 {
12931         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12932         struct btrfs_path path;
12933         struct btrfs_extent_item *ei;
12934         struct extent_buffer *leaf;
12935         char *buf;
12936         struct btrfs_key key;
12937         int ret;
12938
12939         btrfs_init_path(&path);
12940         key.objectid = 0;
12941         key.type = BTRFS_EXTENT_ITEM_KEY;
12942         key.offset = 0;
12943         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12944         if (ret < 0) {
12945                 btrfs_release_path(&path);
12946                 return ret;
12947         }
12948
12949         buf = malloc(csum_root->fs_info->sectorsize);
12950         if (!buf) {
12951                 btrfs_release_path(&path);
12952                 return -ENOMEM;
12953         }
12954
12955         while (1) {
12956                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12957                         ret = btrfs_next_leaf(extent_root, &path);
12958                         if (ret < 0)
12959                                 break;
12960                         if (ret) {
12961                                 ret = 0;
12962                                 break;
12963                         }
12964                 }
12965                 leaf = path.nodes[0];
12966
12967                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12968                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12969                         path.slots[0]++;
12970                         continue;
12971                 }
12972
12973                 ei = btrfs_item_ptr(leaf, path.slots[0],
12974                                     struct btrfs_extent_item);
12975                 if (!(btrfs_extent_flags(leaf, ei) &
12976                       BTRFS_EXTENT_FLAG_DATA)) {
12977                         path.slots[0]++;
12978                         continue;
12979                 }
12980
12981                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12982                                     key.offset);
12983                 if (ret)
12984                         break;
12985                 path.slots[0]++;
12986         }
12987
12988         btrfs_release_path(&path);
12989         free(buf);
12990         return ret;
12991 }
12992
12993 /*
12994  * Recalculate the csum and put it into the csum tree.
12995  *
12996  * Extent tree init will wipe out all the extent info, so in that case, we
12997  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12998  * will use fs/subvol trees to init the csum tree.
12999  */
13000 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13001                           struct btrfs_root *csum_root,
13002                           int search_fs_tree)
13003 {
13004         if (search_fs_tree)
13005                 return fill_csum_tree_from_fs(trans, csum_root);
13006         else
13007                 return fill_csum_tree_from_extent(trans, csum_root);
13008 }
13009
13010 static void free_roots_info_cache(void)
13011 {
13012         if (!roots_info_cache)
13013                 return;
13014
13015         while (!cache_tree_empty(roots_info_cache)) {
13016                 struct cache_extent *entry;
13017                 struct root_item_info *rii;
13018
13019                 entry = first_cache_extent(roots_info_cache);
13020                 if (!entry)
13021                         break;
13022                 remove_cache_extent(roots_info_cache, entry);
13023                 rii = container_of(entry, struct root_item_info, cache_extent);
13024                 free(rii);
13025         }
13026
13027         free(roots_info_cache);
13028         roots_info_cache = NULL;
13029 }
13030
13031 static int build_roots_info_cache(struct btrfs_fs_info *info)
13032 {
13033         int ret = 0;
13034         struct btrfs_key key;
13035         struct extent_buffer *leaf;
13036         struct btrfs_path path;
13037
13038         if (!roots_info_cache) {
13039                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13040                 if (!roots_info_cache)
13041                         return -ENOMEM;
13042                 cache_tree_init(roots_info_cache);
13043         }
13044
13045         btrfs_init_path(&path);
13046         key.objectid = 0;
13047         key.type = BTRFS_EXTENT_ITEM_KEY;
13048         key.offset = 0;
13049         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13050         if (ret < 0)
13051                 goto out;
13052         leaf = path.nodes[0];
13053
13054         while (1) {
13055                 struct btrfs_key found_key;
13056                 struct btrfs_extent_item *ei;
13057                 struct btrfs_extent_inline_ref *iref;
13058                 int slot = path.slots[0];
13059                 int type;
13060                 u64 flags;
13061                 u64 root_id;
13062                 u8 level;
13063                 struct cache_extent *entry;
13064                 struct root_item_info *rii;
13065
13066                 if (slot >= btrfs_header_nritems(leaf)) {
13067                         ret = btrfs_next_leaf(info->extent_root, &path);
13068                         if (ret < 0) {
13069                                 break;
13070                         } else if (ret) {
13071                                 ret = 0;
13072                                 break;
13073                         }
13074                         leaf = path.nodes[0];
13075                         slot = path.slots[0];
13076                 }
13077
13078                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13079
13080                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13081                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13082                         goto next;
13083
13084                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13085                 flags = btrfs_extent_flags(leaf, ei);
13086
13087                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13088                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13089                         goto next;
13090
13091                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13092                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13093                         level = found_key.offset;
13094                 } else {
13095                         struct btrfs_tree_block_info *binfo;
13096
13097                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13098                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13099                         level = btrfs_tree_block_level(leaf, binfo);
13100                 }
13101
13102                 /*
13103                  * For a root extent, it must be of the following type and the
13104                  * first (and only one) iref in the item.
13105                  */
13106                 type = btrfs_extent_inline_ref_type(leaf, iref);
13107                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13108                         goto next;
13109
13110                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13111                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13112                 if (!entry) {
13113                         rii = malloc(sizeof(struct root_item_info));
13114                         if (!rii) {
13115                                 ret = -ENOMEM;
13116                                 goto out;
13117                         }
13118                         rii->cache_extent.start = root_id;
13119                         rii->cache_extent.size = 1;
13120                         rii->level = (u8)-1;
13121                         entry = &rii->cache_extent;
13122                         ret = insert_cache_extent(roots_info_cache, entry);
13123                         ASSERT(ret == 0);
13124                 } else {
13125                         rii = container_of(entry, struct root_item_info,
13126                                            cache_extent);
13127                 }
13128
13129                 ASSERT(rii->cache_extent.start == root_id);
13130                 ASSERT(rii->cache_extent.size == 1);
13131
13132                 if (level > rii->level || rii->level == (u8)-1) {
13133                         rii->level = level;
13134                         rii->bytenr = found_key.objectid;
13135                         rii->gen = btrfs_extent_generation(leaf, ei);
13136                         rii->node_count = 1;
13137                 } else if (level == rii->level) {
13138                         rii->node_count++;
13139                 }
13140 next:
13141                 path.slots[0]++;
13142         }
13143
13144 out:
13145         btrfs_release_path(&path);
13146
13147         return ret;
13148 }
13149
13150 static int maybe_repair_root_item(struct btrfs_path *path,
13151                                   const struct btrfs_key *root_key,
13152                                   const int read_only_mode)
13153 {
13154         const u64 root_id = root_key->objectid;
13155         struct cache_extent *entry;
13156         struct root_item_info *rii;
13157         struct btrfs_root_item ri;
13158         unsigned long offset;
13159
13160         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13161         if (!entry) {
13162                 fprintf(stderr,
13163                         "Error: could not find extent items for root %llu\n",
13164                         root_key->objectid);
13165                 return -ENOENT;
13166         }
13167
13168         rii = container_of(entry, struct root_item_info, cache_extent);
13169         ASSERT(rii->cache_extent.start == root_id);
13170         ASSERT(rii->cache_extent.size == 1);
13171
13172         if (rii->node_count != 1) {
13173                 fprintf(stderr,
13174                         "Error: could not find btree root extent for root %llu\n",
13175                         root_id);
13176                 return -ENOENT;
13177         }
13178
13179         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13180         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13181
13182         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13183             btrfs_root_level(&ri) != rii->level ||
13184             btrfs_root_generation(&ri) != rii->gen) {
13185
13186                 /*
13187                  * If we're in repair mode but our caller told us to not update
13188                  * the root item, i.e. just check if it needs to be updated, don't
13189                  * print this message, since the caller will call us again shortly
13190                  * for the same root item without read only mode (the caller will
13191                  * open a transaction first).
13192                  */
13193                 if (!(read_only_mode && repair))
13194                         fprintf(stderr,
13195                                 "%sroot item for root %llu,"
13196                                 " current bytenr %llu, current gen %llu, current level %u,"
13197                                 " new bytenr %llu, new gen %llu, new level %u\n",
13198                                 (read_only_mode ? "" : "fixing "),
13199                                 root_id,
13200                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13201                                 btrfs_root_level(&ri),
13202                                 rii->bytenr, rii->gen, rii->level);
13203
13204                 if (btrfs_root_generation(&ri) > rii->gen) {
13205                         fprintf(stderr,
13206                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13207                                 root_id, btrfs_root_generation(&ri), rii->gen);
13208                         return -EINVAL;
13209                 }
13210
13211                 if (!read_only_mode) {
13212                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13213                         btrfs_set_root_level(&ri, rii->level);
13214                         btrfs_set_root_generation(&ri, rii->gen);
13215                         write_extent_buffer(path->nodes[0], &ri,
13216                                             offset, sizeof(ri));
13217                 }
13218
13219                 return 1;
13220         }
13221
13222         return 0;
13223 }
13224
13225 /*
13226  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13227  * caused read-only snapshots to be corrupted if they were created at a moment
13228  * when the source subvolume/snapshot had orphan items. The issue was that the
13229  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13230  * node instead of the post orphan cleanup root node.
13231  * So this function, and its callees, just detects and fixes those cases. Even
13232  * though the regression was for read-only snapshots, this function applies to
13233  * any snapshot/subvolume root.
13234  * This must be run before any other repair code - not doing it so, makes other
13235  * repair code delete or modify backrefs in the extent tree for example, which
13236  * will result in an inconsistent fs after repairing the root items.
13237  */
13238 static int repair_root_items(struct btrfs_fs_info *info)
13239 {
13240         struct btrfs_path path;
13241         struct btrfs_key key;
13242         struct extent_buffer *leaf;
13243         struct btrfs_trans_handle *trans = NULL;
13244         int ret = 0;
13245         int bad_roots = 0;
13246         int need_trans = 0;
13247
13248         btrfs_init_path(&path);
13249
13250         ret = build_roots_info_cache(info);
13251         if (ret)
13252                 goto out;
13253
13254         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13255         key.type = BTRFS_ROOT_ITEM_KEY;
13256         key.offset = 0;
13257
13258 again:
13259         /*
13260          * Avoid opening and committing transactions if a leaf doesn't have
13261          * any root items that need to be fixed, so that we avoid rotating
13262          * backup roots unnecessarily.
13263          */
13264         if (need_trans) {
13265                 trans = btrfs_start_transaction(info->tree_root, 1);
13266                 if (IS_ERR(trans)) {
13267                         ret = PTR_ERR(trans);
13268                         goto out;
13269                 }
13270         }
13271
13272         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13273                                 0, trans ? 1 : 0);
13274         if (ret < 0)
13275                 goto out;
13276         leaf = path.nodes[0];
13277
13278         while (1) {
13279                 struct btrfs_key found_key;
13280
13281                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13282                         int no_more_keys = find_next_key(&path, &key);
13283
13284                         btrfs_release_path(&path);
13285                         if (trans) {
13286                                 ret = btrfs_commit_transaction(trans,
13287                                                                info->tree_root);
13288                                 trans = NULL;
13289                                 if (ret < 0)
13290                                         goto out;
13291                         }
13292                         need_trans = 0;
13293                         if (no_more_keys)
13294                                 break;
13295                         goto again;
13296                 }
13297
13298                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13299
13300                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13301                         goto next;
13302                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13303                         goto next;
13304
13305                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13306                 if (ret < 0)
13307                         goto out;
13308                 if (ret) {
13309                         if (!trans && repair) {
13310                                 need_trans = 1;
13311                                 key = found_key;
13312                                 btrfs_release_path(&path);
13313                                 goto again;
13314                         }
13315                         bad_roots++;
13316                 }
13317 next:
13318                 path.slots[0]++;
13319         }
13320         ret = 0;
13321 out:
13322         free_roots_info_cache();
13323         btrfs_release_path(&path);
13324         if (trans)
13325                 btrfs_commit_transaction(trans, info->tree_root);
13326         if (ret < 0)
13327                 return ret;
13328
13329         return bad_roots;
13330 }
13331
13332 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13333 {
13334         struct btrfs_trans_handle *trans;
13335         struct btrfs_block_group_cache *bg_cache;
13336         u64 current = 0;
13337         int ret = 0;
13338
13339         /* Clear all free space cache inodes and its extent data */
13340         while (1) {
13341                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13342                 if (!bg_cache)
13343                         break;
13344                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13345                 if (ret < 0)
13346                         return ret;
13347                 current = bg_cache->key.objectid + bg_cache->key.offset;
13348         }
13349
13350         /* Don't forget to set cache_generation to -1 */
13351         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13352         if (IS_ERR(trans)) {
13353                 error("failed to update super block cache generation");
13354                 return PTR_ERR(trans);
13355         }
13356         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13357         btrfs_commit_transaction(trans, fs_info->tree_root);
13358
13359         return ret;
13360 }
13361
13362 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13363                 int clear_version)
13364 {
13365         int ret = 0;
13366
13367         if (clear_version == 1) {
13368                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13369                         error(
13370                 "free space cache v2 detected, use --clear-space-cache v2");
13371                         ret = 1;
13372                         goto close_out;
13373                 }
13374                 printf("Clearing free space cache\n");
13375                 ret = clear_free_space_cache(fs_info);
13376                 if (ret) {
13377                         error("failed to clear free space cache");
13378                         ret = 1;
13379                 } else {
13380                         printf("Free space cache cleared\n");
13381                 }
13382         } else if (clear_version == 2) {
13383                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13384                         printf("no free space cache v2 to clear\n");
13385                         ret = 0;
13386                         goto close_out;
13387                 }
13388                 printf("Clear free space cache v2\n");
13389                 ret = btrfs_clear_free_space_tree(fs_info);
13390                 if (ret) {
13391                         error("failed to clear free space cache v2: %d", ret);
13392                         ret = 1;
13393                 } else {
13394                         printf("free space cache v2 cleared\n");
13395                 }
13396         }
13397 close_out:
13398         return ret;
13399 }
13400
13401 const char * const cmd_check_usage[] = {
13402         "btrfs check [options] <device>",
13403         "Check structural integrity of a filesystem (unmounted).",
13404         "Check structural integrity of an unmounted filesystem. Verify internal",
13405         "trees' consistency and item connectivity. In the repair mode try to",
13406         "fix the problems found. ",
13407         "WARNING: the repair mode is considered dangerous",
13408         "",
13409         "-s|--super <superblock>     use this superblock copy",
13410         "-b|--backup                 use the first valid backup root copy",
13411         "--force                     skip mount checks, repair is not possible",
13412         "--repair                    try to repair the filesystem",
13413         "--readonly                  run in read-only mode (default)",
13414         "--init-csum-tree            create a new CRC tree",
13415         "--init-extent-tree          create a new extent tree",
13416         "--mode <MODE>               allows choice of memory/IO trade-offs",
13417         "                            where MODE is one of:",
13418         "                            original - read inodes and extents to memory (requires",
13419         "                                       more memory, does less IO)",
13420         "                            lowmem   - try to use less memory but read blocks again",
13421         "                                       when needed",
13422         "--check-data-csum           verify checksums of data blocks",
13423         "-Q|--qgroup-report          print a report on qgroup consistency",
13424         "-E|--subvol-extents <subvolid>",
13425         "                            print subvolume extents and sharing state",
13426         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13427         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13428         "-p|--progress               indicate progress",
13429         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13430         NULL
13431 };
13432
13433 int cmd_check(int argc, char **argv)
13434 {
13435         struct cache_tree root_cache;
13436         struct btrfs_root *root;
13437         struct btrfs_fs_info *info;
13438         u64 bytenr = 0;
13439         u64 subvolid = 0;
13440         u64 tree_root_bytenr = 0;
13441         u64 chunk_root_bytenr = 0;
13442         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13443         int ret = 0;
13444         int err = 0;
13445         u64 num;
13446         int init_csum_tree = 0;
13447         int readonly = 0;
13448         int clear_space_cache = 0;
13449         int qgroup_report = 0;
13450         int qgroups_repaired = 0;
13451         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13452         int force = 0;
13453
13454         while(1) {
13455                 int c;
13456                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13457                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13458                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13459                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13460                         GETOPT_VAL_FORCE };
13461                 static const struct option long_options[] = {
13462                         { "super", required_argument, NULL, 's' },
13463                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13464                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13465                         { "init-csum-tree", no_argument, NULL,
13466                                 GETOPT_VAL_INIT_CSUM },
13467                         { "init-extent-tree", no_argument, NULL,
13468                                 GETOPT_VAL_INIT_EXTENT },
13469                         { "check-data-csum", no_argument, NULL,
13470                                 GETOPT_VAL_CHECK_CSUM },
13471                         { "backup", no_argument, NULL, 'b' },
13472                         { "subvol-extents", required_argument, NULL, 'E' },
13473                         { "qgroup-report", no_argument, NULL, 'Q' },
13474                         { "tree-root", required_argument, NULL, 'r' },
13475                         { "chunk-root", required_argument, NULL,
13476                                 GETOPT_VAL_CHUNK_TREE },
13477                         { "progress", no_argument, NULL, 'p' },
13478                         { "mode", required_argument, NULL,
13479                                 GETOPT_VAL_MODE },
13480                         { "clear-space-cache", required_argument, NULL,
13481                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13482                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13483                         { NULL, 0, NULL, 0}
13484                 };
13485
13486                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13487                 if (c < 0)
13488                         break;
13489                 switch(c) {
13490                         case 'a': /* ignored */ break;
13491                         case 'b':
13492                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13493                                 break;
13494                         case 's':
13495                                 num = arg_strtou64(optarg);
13496                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13497                                         error(
13498                                         "super mirror should be less than %d",
13499                                                 BTRFS_SUPER_MIRROR_MAX);
13500                                         exit(1);
13501                                 }
13502                                 bytenr = btrfs_sb_offset(((int)num));
13503                                 printf("using SB copy %llu, bytenr %llu\n", num,
13504                                        (unsigned long long)bytenr);
13505                                 break;
13506                         case 'Q':
13507                                 qgroup_report = 1;
13508                                 break;
13509                         case 'E':
13510                                 subvolid = arg_strtou64(optarg);
13511                                 break;
13512                         case 'r':
13513                                 tree_root_bytenr = arg_strtou64(optarg);
13514                                 break;
13515                         case GETOPT_VAL_CHUNK_TREE:
13516                                 chunk_root_bytenr = arg_strtou64(optarg);
13517                                 break;
13518                         case 'p':
13519                                 ctx.progress_enabled = true;
13520                                 break;
13521                         case '?':
13522                         case 'h':
13523                                 usage(cmd_check_usage);
13524                         case GETOPT_VAL_REPAIR:
13525                                 printf("enabling repair mode\n");
13526                                 repair = 1;
13527                                 ctree_flags |= OPEN_CTREE_WRITES;
13528                                 break;
13529                         case GETOPT_VAL_READONLY:
13530                                 readonly = 1;
13531                                 break;
13532                         case GETOPT_VAL_INIT_CSUM:
13533                                 printf("Creating a new CRC tree\n");
13534                                 init_csum_tree = 1;
13535                                 repair = 1;
13536                                 ctree_flags |= OPEN_CTREE_WRITES;
13537                                 break;
13538                         case GETOPT_VAL_INIT_EXTENT:
13539                                 init_extent_tree = 1;
13540                                 ctree_flags |= (OPEN_CTREE_WRITES |
13541                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13542                                 repair = 1;
13543                                 break;
13544                         case GETOPT_VAL_CHECK_CSUM:
13545                                 check_data_csum = 1;
13546                                 break;
13547                         case GETOPT_VAL_MODE:
13548                                 check_mode = parse_check_mode(optarg);
13549                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13550                                         error("unknown mode: %s", optarg);
13551                                         exit(1);
13552                                 }
13553                                 break;
13554                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13555                                 if (strcmp(optarg, "v1") == 0) {
13556                                         clear_space_cache = 1;
13557                                 } else if (strcmp(optarg, "v2") == 0) {
13558                                         clear_space_cache = 2;
13559                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13560                                 } else {
13561                                         error(
13562                 "invalid argument to --clear-space-cache, must be v1 or v2");
13563                                         exit(1);
13564                                 }
13565                                 ctree_flags |= OPEN_CTREE_WRITES;
13566                                 break;
13567                         case GETOPT_VAL_FORCE:
13568                                 force = 1;
13569                                 break;
13570                 }
13571         }
13572
13573         if (check_argc_exact(argc - optind, 1))
13574                 usage(cmd_check_usage);
13575
13576         if (ctx.progress_enabled) {
13577                 ctx.tp = TASK_NOTHING;
13578                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13579         }
13580
13581         /* This check is the only reason for --readonly to exist */
13582         if (readonly && repair) {
13583                 error("repair options are not compatible with --readonly");
13584                 exit(1);
13585         }
13586
13587         /*
13588          * experimental and dangerous
13589          */
13590         if (repair && check_mode == CHECK_MODE_LOWMEM)
13591                 warning("low-memory mode repair support is only partial");
13592
13593         radix_tree_init();
13594         cache_tree_init(&root_cache);
13595
13596         ret = check_mounted(argv[optind]);
13597         if (!force) {
13598                 if (ret < 0) {
13599                         error("could not check mount status: %s",
13600                                         strerror(-ret));
13601                         err |= !!ret;
13602                         goto err_out;
13603                 } else if (ret) {
13604                         error(
13605 "%s is currently mounted, use --force if you really intend to check the filesystem",
13606                                 argv[optind]);
13607                         ret = -EBUSY;
13608                         err |= !!ret;
13609                         goto err_out;
13610                 }
13611         } else {
13612                 if (repair) {
13613                         error("repair and --force is not yet supported");
13614                         ret = 1;
13615                         err |= !!ret;
13616                         goto err_out;
13617                 }
13618                 if (ret < 0) {
13619                         warning(
13620 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13621                                 argv[optind]);
13622                 } else if (ret) {
13623                         warning(
13624                         "filesystem mounted, continuing because of --force");
13625                 }
13626                 /* A block device is mounted in exclusive mode by kernel */
13627                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13628         }
13629
13630         /* only allow partial opening under repair mode */
13631         if (repair)
13632                 ctree_flags |= OPEN_CTREE_PARTIAL;
13633
13634         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13635                                   chunk_root_bytenr, ctree_flags);
13636         if (!info) {
13637                 error("cannot open file system");
13638                 ret = -EIO;
13639                 err |= !!ret;
13640                 goto err_out;
13641         }
13642
13643         global_info = info;
13644         root = info->fs_root;
13645         uuid_unparse(info->super_copy->fsid, uuidbuf);
13646
13647         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13648
13649         /*
13650          * Check the bare minimum before starting anything else that could rely
13651          * on it, namely the tree roots, any local consistency checks
13652          */
13653         if (!extent_buffer_uptodate(info->tree_root->node) ||
13654             !extent_buffer_uptodate(info->dev_root->node) ||
13655             !extent_buffer_uptodate(info->chunk_root->node)) {
13656                 error("critical roots corrupted, unable to check the filesystem");
13657                 err |= !!ret;
13658                 ret = -EIO;
13659                 goto close_out;
13660         }
13661
13662         if (clear_space_cache) {
13663                 ret = do_clear_free_space_cache(info, clear_space_cache);
13664                 err |= !!ret;
13665                 goto close_out;
13666         }
13667
13668         /*
13669          * repair mode will force us to commit transaction which
13670          * will make us fail to load log tree when mounting.
13671          */
13672         if (repair && btrfs_super_log_root(info->super_copy)) {
13673                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13674                 if (!ret) {
13675                         ret = 1;
13676                         err |= !!ret;
13677                         goto close_out;
13678                 }
13679                 ret = zero_log_tree(root);
13680                 err |= !!ret;
13681                 if (ret) {
13682                         error("failed to zero log tree: %d", ret);
13683                         goto close_out;
13684                 }
13685         }
13686
13687         if (qgroup_report) {
13688                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13689                        uuidbuf);
13690                 ret = qgroup_verify_all(info);
13691                 err |= !!ret;
13692                 if (ret == 0)
13693                         report_qgroups(1);
13694                 goto close_out;
13695         }
13696         if (subvolid) {
13697                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13698                        subvolid, argv[optind], uuidbuf);
13699                 ret = print_extent_state(info, subvolid);
13700                 err |= !!ret;
13701                 goto close_out;
13702         }
13703
13704         if (init_extent_tree || init_csum_tree) {
13705                 struct btrfs_trans_handle *trans;
13706
13707                 trans = btrfs_start_transaction(info->extent_root, 0);
13708                 if (IS_ERR(trans)) {
13709                         error("error starting transaction");
13710                         ret = PTR_ERR(trans);
13711                         err |= !!ret;
13712                         goto close_out;
13713                 }
13714
13715                 if (init_extent_tree) {
13716                         printf("Creating a new extent tree\n");
13717                         ret = reinit_extent_tree(trans, info);
13718                         err |= !!ret;
13719                         if (ret)
13720                                 goto close_out;
13721                 }
13722
13723                 if (init_csum_tree) {
13724                         printf("Reinitialize checksum tree\n");
13725                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13726                         if (ret) {
13727                                 error("checksum tree initialization failed: %d",
13728                                                 ret);
13729                                 ret = -EIO;
13730                                 err |= !!ret;
13731                                 goto close_out;
13732                         }
13733
13734                         ret = fill_csum_tree(trans, info->csum_root,
13735                                              init_extent_tree);
13736                         err |= !!ret;
13737                         if (ret) {
13738                                 error("checksum tree refilling failed: %d", ret);
13739                                 return -EIO;
13740                         }
13741                 }
13742                 /*
13743                  * Ok now we commit and run the normal fsck, which will add
13744                  * extent entries for all of the items it finds.
13745                  */
13746                 ret = btrfs_commit_transaction(trans, info->extent_root);
13747                 err |= !!ret;
13748                 if (ret)
13749                         goto close_out;
13750         }
13751         if (!extent_buffer_uptodate(info->extent_root->node)) {
13752                 error("critical: extent_root, unable to check the filesystem");
13753                 ret = -EIO;
13754                 err |= !!ret;
13755                 goto close_out;
13756         }
13757         if (!extent_buffer_uptodate(info->csum_root->node)) {
13758                 error("critical: csum_root, unable to check the filesystem");
13759                 ret = -EIO;
13760                 err |= !!ret;
13761                 goto close_out;
13762         }
13763
13764         ret = do_check_chunks_and_extents(info);
13765         err |= !!ret;
13766         if (ret)
13767                 error(
13768                 "errors found in extent allocation tree or chunk allocation");
13769
13770         ret = repair_root_items(info);
13771         err |= !!ret;
13772         if (ret < 0) {
13773                 error("failed to repair root items: %s", strerror(-ret));
13774                 goto close_out;
13775         }
13776         if (repair) {
13777                 fprintf(stderr, "Fixed %d roots.\n", ret);
13778                 ret = 0;
13779         } else if (ret > 0) {
13780                 fprintf(stderr,
13781                        "Found %d roots with an outdated root item.\n",
13782                        ret);
13783                 fprintf(stderr,
13784                         "Please run a filesystem check with the option --repair to fix them.\n");
13785                 ret = 1;
13786                 err |= !!ret;
13787                 goto close_out;
13788         }
13789
13790         if (!ctx.progress_enabled) {
13791                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13792                         fprintf(stderr, "checking free space tree\n");
13793                 else
13794                         fprintf(stderr, "checking free space cache\n");
13795         }
13796         ret = check_space_cache(root);
13797         err |= !!ret;
13798         if (ret) {
13799                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13800                         error("errors found in free space tree");
13801                 else
13802                         error("errors found in free space cache");
13803                 goto out;
13804         }
13805
13806         /*
13807          * We used to have to have these hole extents in between our real
13808          * extents so if we don't have this flag set we need to make sure there
13809          * are no gaps in the file extents for inodes, otherwise we can just
13810          * ignore it when this happens.
13811          */
13812         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13813         ret = do_check_fs_roots(info, &root_cache);
13814         err |= !!ret;
13815         if (ret) {
13816                 error("errors found in fs roots");
13817                 goto out;
13818         }
13819
13820         fprintf(stderr, "checking csums\n");
13821         ret = check_csums(root);
13822         err |= !!ret;
13823         if (ret) {
13824                 error("errors found in csum tree");
13825                 goto out;
13826         }
13827
13828         fprintf(stderr, "checking root refs\n");
13829         /* For low memory mode, check_fs_roots_v2 handles root refs */
13830         if (check_mode != CHECK_MODE_LOWMEM) {
13831                 ret = check_root_refs(root, &root_cache);
13832                 err |= !!ret;
13833                 if (ret) {
13834                         error("errors found in root refs");
13835                         goto out;
13836                 }
13837         }
13838
13839         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13840                 struct extent_buffer *eb;
13841
13842                 eb = list_first_entry(&root->fs_info->recow_ebs,
13843                                       struct extent_buffer, recow);
13844                 list_del_init(&eb->recow);
13845                 ret = recow_extent_buffer(root, eb);
13846                 err |= !!ret;
13847                 if (ret) {
13848                         error("fails to fix transid errors");
13849                         break;
13850                 }
13851         }
13852
13853         while (!list_empty(&delete_items)) {
13854                 struct bad_item *bad;
13855
13856                 bad = list_first_entry(&delete_items, struct bad_item, list);
13857                 list_del_init(&bad->list);
13858                 if (repair) {
13859                         ret = delete_bad_item(root, bad);
13860                         err |= !!ret;
13861                 }
13862                 free(bad);
13863         }
13864
13865         if (info->quota_enabled) {
13866                 fprintf(stderr, "checking quota groups\n");
13867                 ret = qgroup_verify_all(info);
13868                 err |= !!ret;
13869                 if (ret) {
13870                         error("failed to check quota groups");
13871                         goto out;
13872                 }
13873                 report_qgroups(0);
13874                 ret = repair_qgroups(info, &qgroups_repaired);
13875                 err |= !!ret;
13876                 if (err) {
13877                         error("failed to repair quota groups");
13878                         goto out;
13879                 }
13880                 ret = 0;
13881         }
13882
13883         if (!list_empty(&root->fs_info->recow_ebs)) {
13884                 error("transid errors in file system");
13885                 ret = 1;
13886                 err |= !!ret;
13887         }
13888 out:
13889         printf("found %llu bytes used, ",
13890                (unsigned long long)bytes_used);
13891         if (err)
13892                 printf("error(s) found\n");
13893         else
13894                 printf("no error found\n");
13895         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13896         printf("total tree bytes: %llu\n",
13897                (unsigned long long)total_btree_bytes);
13898         printf("total fs tree bytes: %llu\n",
13899                (unsigned long long)total_fs_tree_bytes);
13900         printf("total extent tree bytes: %llu\n",
13901                (unsigned long long)total_extent_tree_bytes);
13902         printf("btree space waste bytes: %llu\n",
13903                (unsigned long long)btree_space_waste);
13904         printf("file data blocks allocated: %llu\n referenced %llu\n",
13905                 (unsigned long long)data_bytes_allocated,
13906                 (unsigned long long)data_bytes_referenced);
13907
13908         free_qgroup_counts();
13909         free_root_recs_tree(&root_cache);
13910 close_out:
13911         close_ctree(root);
13912 err_out:
13913         if (ctx.progress_enabled)
13914                 task_deinit(ctx.info);
13915
13916         return err;
13917 }