btrfs-progs: check: introduce repair_inode_item_missing()
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 {
138         return container_of(back, struct data_backref, node);
139 }
140
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
142 {
143         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145         struct data_backref *back1 = to_data_backref(ext1);
146         struct data_backref *back2 = to_data_backref(ext2);
147
148         WARN_ON(!ext1->is_data);
149         WARN_ON(!ext2->is_data);
150
151         /* parent and root are a union, so this covers both */
152         if (back1->parent > back2->parent)
153                 return 1;
154         if (back1->parent < back2->parent)
155                 return -1;
156
157         /* This is a full backref and the parents match. */
158         if (back1->node.full_backref)
159                 return 0;
160
161         if (back1->owner > back2->owner)
162                 return 1;
163         if (back1->owner < back2->owner)
164                 return -1;
165
166         if (back1->offset > back2->offset)
167                 return 1;
168         if (back1->offset < back2->offset)
169                 return -1;
170
171         if (back1->found_ref && back2->found_ref) {
172                 if (back1->disk_bytenr > back2->disk_bytenr)
173                         return 1;
174                 if (back1->disk_bytenr < back2->disk_bytenr)
175                         return -1;
176
177                 if (back1->bytes > back2->bytes)
178                         return 1;
179                 if (back1->bytes < back2->bytes)
180                         return -1;
181         }
182
183         return 0;
184 }
185
186 /*
187  * Much like data_backref, just removed the undetermined members
188  * and change it to use list_head.
189  * During extent scan, it is stored in root->orphan_data_extent.
190  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
191  */
192 struct orphan_data_extent {
193         struct list_head list;
194         u64 root;
195         u64 objectid;
196         u64 offset;
197         u64 disk_bytenr;
198         u64 disk_len;
199 };
200
201 struct tree_backref {
202         struct extent_backref node;
203         union {
204                 u64 parent;
205                 u64 root;
206         };
207 };
208
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
210 {
211         return container_of(back, struct tree_backref, node);
212 }
213
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
215 {
216         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218         struct tree_backref *back1 = to_tree_backref(ext1);
219         struct tree_backref *back2 = to_tree_backref(ext2);
220
221         WARN_ON(ext1->is_data);
222         WARN_ON(ext2->is_data);
223
224         /* parent and root are a union, so this covers both */
225         if (back1->parent > back2->parent)
226                 return 1;
227         if (back1->parent < back2->parent)
228                 return -1;
229
230         return 0;
231 }
232
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
234 {
235         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
237
238         if (ext1->is_data > ext2->is_data)
239                 return 1;
240
241         if (ext1->is_data < ext2->is_data)
242                 return -1;
243
244         if (ext1->full_backref > ext2->full_backref)
245                 return 1;
246         if (ext1->full_backref < ext2->full_backref)
247                 return -1;
248
249         if (ext1->is_data)
250                 return compare_data_backref(node1, node2);
251         else
252                 return compare_tree_backref(node1, node2);
253 }
254
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
257
258 struct extent_record {
259         struct list_head backrefs;
260         struct list_head dups;
261         struct rb_root backref_tree;
262         struct list_head list;
263         struct cache_extent cache;
264         struct btrfs_disk_key parent_key;
265         u64 start;
266         u64 max_size;
267         u64 nr;
268         u64 refs;
269         u64 extent_item_refs;
270         u64 generation;
271         u64 parent_generation;
272         u64 info_objectid;
273         u32 num_duplicates;
274         u8 info_level;
275         unsigned int flag_block_full_backref:2;
276         unsigned int found_rec:1;
277         unsigned int content_checked:1;
278         unsigned int owner_ref_checked:1;
279         unsigned int is_root:1;
280         unsigned int metadata:1;
281         unsigned int bad_full_backref:1;
282         unsigned int crossing_stripes:1;
283         unsigned int wrong_chunk_type:1;
284 };
285
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
287 {
288         return container_of(entry, struct extent_record, list);
289 }
290
291 struct inode_backref {
292         struct list_head list;
293         unsigned int found_dir_item:1;
294         unsigned int found_dir_index:1;
295         unsigned int found_inode_ref:1;
296         u8 filetype;
297         u8 ref_type;
298         int errors;
299         u64 dir;
300         u64 index;
301         u16 namelen;
302         char name[0];
303 };
304
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
306 {
307         return list_entry(entry, struct inode_backref, list);
308 }
309
310 struct root_item_record {
311         struct list_head list;
312         u64 objectid;
313         u64 bytenr;
314         u64 last_snapshot;
315         u8 level;
316         u8 drop_level;
317         struct btrfs_key drop_key;
318 };
319
320 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
322 #define REF_ERR_NO_INODE_REF            (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
325 #define REF_ERR_DUP_INODE_REF           (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF             (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
333
334 struct file_extent_hole {
335         struct rb_node node;
336         u64 start;
337         u64 len;
338 };
339
340 struct inode_record {
341         struct list_head backrefs;
342         unsigned int checked:1;
343         unsigned int merging:1;
344         unsigned int found_inode_item:1;
345         unsigned int found_dir_item:1;
346         unsigned int found_file_extent:1;
347         unsigned int found_csum_item:1;
348         unsigned int some_csum_missing:1;
349         unsigned int nodatasum:1;
350         int errors;
351
352         u64 ino;
353         u32 nlink;
354         u32 imode;
355         u64 isize;
356         u64 nbytes;
357
358         u32 found_link;
359         u64 found_size;
360         u64 extent_start;
361         u64 extent_end;
362         struct rb_root holes;
363         struct list_head orphan_extents;
364
365         u32 refs;
366 };
367
368 #define I_ERR_NO_INODE_ITEM             (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
383
384 struct root_backref {
385         struct list_head list;
386         unsigned int found_dir_item:1;
387         unsigned int found_dir_index:1;
388         unsigned int found_back_ref:1;
389         unsigned int found_forward_ref:1;
390         unsigned int reachable:1;
391         int errors;
392         u64 ref_root;
393         u64 dir;
394         u64 index;
395         u16 namelen;
396         char name[0];
397 };
398
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
400 {
401         return list_entry(entry, struct root_backref, list);
402 }
403
404 struct root_record {
405         struct list_head backrefs;
406         struct cache_extent cache;
407         unsigned int found_root_item:1;
408         u64 objectid;
409         u32 found_ref;
410 };
411
412 struct ptr_node {
413         struct cache_extent cache;
414         void *data;
415 };
416
417 struct shared_node {
418         struct cache_extent cache;
419         struct cache_tree root_cache;
420         struct cache_tree inode_cache;
421         struct inode_record *current;
422         u32 refs;
423 };
424
425 struct block_info {
426         u64 start;
427         u32 size;
428 };
429
430 struct walk_control {
431         struct cache_tree shared;
432         struct shared_node *nodes[BTRFS_MAX_LEVEL];
433         int active_node;
434         int root_level;
435 };
436
437 struct bad_item {
438         struct btrfs_key key;
439         u64 root_id;
440         struct list_head list;
441 };
442
443 struct extent_entry {
444         u64 bytenr;
445         u64 bytes;
446         int count;
447         int broken;
448         struct list_head list;
449 };
450
451 struct root_item_info {
452         /* level of the root */
453         u8 level;
454         /* number of nodes at this level, must be 1 for a root */
455         int node_count;
456         u64 bytenr;
457         u64 gen;
458         struct cache_extent cache_extent;
459 };
460
461 /*
462  * Error bit for low memory mode check.
463  *
464  * Currently no caller cares about it yet.  Just internal use for error
465  * classification.
466  */
467 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH     (1 << 8)
477
478 static void *print_status_check(void *p)
479 {
480         struct task_ctx *priv = p;
481         const char work_indicator[] = { '.', 'o', 'O', 'o' };
482         uint32_t count = 0;
483         static char *task_position_string[] = {
484                 "checking extents",
485                 "checking free space cache",
486                 "checking fs roots",
487         };
488
489         task_period_start(priv->info, 1000 /* 1s */);
490
491         if (priv->tp == TASK_NOTHING)
492                 return NULL;
493
494         while (1) {
495                 printf("%s [%c]\r", task_position_string[priv->tp],
496                                 work_indicator[count % 4]);
497                 count++;
498                 fflush(stdout);
499                 task_period_wait(priv->info);
500         }
501         return NULL;
502 }
503
504 static int print_status_return(void *p)
505 {
506         printf("\n");
507         fflush(stdout);
508
509         return 0;
510 }
511
512 static enum btrfs_check_mode parse_check_mode(const char *str)
513 {
514         if (strcmp(str, "lowmem") == 0)
515                 return CHECK_MODE_LOWMEM;
516         if (strcmp(str, "orig") == 0)
517                 return CHECK_MODE_ORIGINAL;
518         if (strcmp(str, "original") == 0)
519                 return CHECK_MODE_ORIGINAL;
520
521         return CHECK_MODE_UNKNOWN;
522 }
523
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
526 {
527         struct file_extent_hole *hole;
528
529         if (RB_EMPTY_ROOT(holes))
530                 return (u64)-1;
531
532         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
533         return hole->start;
534 }
535
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
537 {
538         struct file_extent_hole *hole1;
539         struct file_extent_hole *hole2;
540
541         hole1 = rb_entry(node1, struct file_extent_hole, node);
542         hole2 = rb_entry(node2, struct file_extent_hole, node);
543
544         if (hole1->start > hole2->start)
545                 return -1;
546         if (hole1->start < hole2->start)
547                 return 1;
548         /* Now hole1->start == hole2->start */
549         if (hole1->len >= hole2->len)
550                 /*
551                  * Hole 1 will be merge center
552                  * Same hole will be merged later
553                  */
554                 return -1;
555         /* Hole 2 will be merge center */
556         return 1;
557 }
558
559 /*
560  * Add a hole to the record
561  *
562  * This will do hole merge for copy_file_extent_holes(),
563  * which will ensure there won't be continuous holes.
564  */
565 static int add_file_extent_hole(struct rb_root *holes,
566                                 u64 start, u64 len)
567 {
568         struct file_extent_hole *hole;
569         struct file_extent_hole *prev = NULL;
570         struct file_extent_hole *next = NULL;
571
572         hole = malloc(sizeof(*hole));
573         if (!hole)
574                 return -ENOMEM;
575         hole->start = start;
576         hole->len = len;
577         /* Since compare will not return 0, no -EEXIST will happen */
578         rb_insert(holes, &hole->node, compare_hole);
579
580         /* simple merge with previous hole */
581         if (rb_prev(&hole->node))
582                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
583                                 node);
584         if (prev && prev->start + prev->len >= hole->start) {
585                 hole->len = hole->start + hole->len - prev->start;
586                 hole->start = prev->start;
587                 rb_erase(&prev->node, holes);
588                 free(prev);
589                 prev = NULL;
590         }
591
592         /* iterate merge with next holes */
593         while (1) {
594                 if (!rb_next(&hole->node))
595                         break;
596                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
597                                         node);
598                 if (hole->start + hole->len >= next->start) {
599                         if (hole->start + hole->len <= next->start + next->len)
600                                 hole->len = next->start + next->len -
601                                             hole->start;
602                         rb_erase(&next->node, holes);
603                         free(next);
604                         next = NULL;
605                 } else
606                         break;
607         }
608         return 0;
609 }
610
611 static int compare_hole_range(struct rb_node *node, void *data)
612 {
613         struct file_extent_hole *hole;
614         u64 start;
615
616         hole = (struct file_extent_hole *)data;
617         start = hole->start;
618
619         hole = rb_entry(node, struct file_extent_hole, node);
620         if (start < hole->start)
621                 return -1;
622         if (start >= hole->start && start < hole->start + hole->len)
623                 return 0;
624         return 1;
625 }
626
627 /*
628  * Delete a hole in the record
629  *
630  * This will do the hole split and is much restrict than add.
631  */
632 static int del_file_extent_hole(struct rb_root *holes,
633                                 u64 start, u64 len)
634 {
635         struct file_extent_hole *hole;
636         struct file_extent_hole tmp;
637         u64 prev_start = 0;
638         u64 prev_len = 0;
639         u64 next_start = 0;
640         u64 next_len = 0;
641         struct rb_node *node;
642         int have_prev = 0;
643         int have_next = 0;
644         int ret = 0;
645
646         tmp.start = start;
647         tmp.len = len;
648         node = rb_search(holes, &tmp, compare_hole_range, NULL);
649         if (!node)
650                 return -EEXIST;
651         hole = rb_entry(node, struct file_extent_hole, node);
652         if (start + len > hole->start + hole->len)
653                 return -EEXIST;
654
655         /*
656          * Now there will be no overlap, delete the hole and re-add the
657          * split(s) if they exists.
658          */
659         if (start > hole->start) {
660                 prev_start = hole->start;
661                 prev_len = start - hole->start;
662                 have_prev = 1;
663         }
664         if (hole->start + hole->len > start + len) {
665                 next_start = start + len;
666                 next_len = hole->start + hole->len - start - len;
667                 have_next = 1;
668         }
669         rb_erase(node, holes);
670         free(hole);
671         if (have_prev) {
672                 ret = add_file_extent_hole(holes, prev_start, prev_len);
673                 if (ret < 0)
674                         return ret;
675         }
676         if (have_next) {
677                 ret = add_file_extent_hole(holes, next_start, next_len);
678                 if (ret < 0)
679                         return ret;
680         }
681         return 0;
682 }
683
684 static int copy_file_extent_holes(struct rb_root *dst,
685                                   struct rb_root *src)
686 {
687         struct file_extent_hole *hole;
688         struct rb_node *node;
689         int ret = 0;
690
691         node = rb_first(src);
692         while (node) {
693                 hole = rb_entry(node, struct file_extent_hole, node);
694                 ret = add_file_extent_hole(dst, hole->start, hole->len);
695                 if (ret)
696                         break;
697                 node = rb_next(node);
698         }
699         return ret;
700 }
701
702 static void free_file_extent_holes(struct rb_root *holes)
703 {
704         struct rb_node *node;
705         struct file_extent_hole *hole;
706
707         node = rb_first(holes);
708         while (node) {
709                 hole = rb_entry(node, struct file_extent_hole, node);
710                 rb_erase(node, holes);
711                 free(hole);
712                 node = rb_first(holes);
713         }
714 }
715
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
717
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719                                  struct btrfs_root *root)
720 {
721         if (root->last_trans != trans->transid) {
722                 root->track_dirty = 1;
723                 root->last_trans = trans->transid;
724                 root->commit_root = root->node;
725                 extent_buffer_get(root->node);
726         }
727 }
728
729 static u8 imode_to_type(u32 imode)
730 {
731 #define S_SHIFT 12
732         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
734                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
735                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
736                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
737                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
738                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
739                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
740         };
741
742         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
743 #undef S_SHIFT
744 }
745
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
747 {
748         struct device_record *rec1;
749         struct device_record *rec2;
750
751         rec1 = rb_entry(node1, struct device_record, node);
752         rec2 = rb_entry(node2, struct device_record, node);
753         if (rec1->devid > rec2->devid)
754                 return -1;
755         else if (rec1->devid < rec2->devid)
756                 return 1;
757         else
758                 return 0;
759 }
760
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
762 {
763         struct inode_record *rec;
764         struct inode_backref *backref;
765         struct inode_backref *orig;
766         struct inode_backref *tmp;
767         struct orphan_data_extent *src_orphan;
768         struct orphan_data_extent *dst_orphan;
769         struct rb_node *rb;
770         size_t size;
771         int ret;
772
773         rec = malloc(sizeof(*rec));
774         if (!rec)
775                 return ERR_PTR(-ENOMEM);
776         memcpy(rec, orig_rec, sizeof(*rec));
777         rec->refs = 1;
778         INIT_LIST_HEAD(&rec->backrefs);
779         INIT_LIST_HEAD(&rec->orphan_extents);
780         rec->holes = RB_ROOT;
781
782         list_for_each_entry(orig, &orig_rec->backrefs, list) {
783                 size = sizeof(*orig) + orig->namelen + 1;
784                 backref = malloc(size);
785                 if (!backref) {
786                         ret = -ENOMEM;
787                         goto cleanup;
788                 }
789                 memcpy(backref, orig, size);
790                 list_add_tail(&backref->list, &rec->backrefs);
791         }
792         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793                 dst_orphan = malloc(sizeof(*dst_orphan));
794                 if (!dst_orphan) {
795                         ret = -ENOMEM;
796                         goto cleanup;
797                 }
798                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
800         }
801         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
802         if (ret < 0)
803                 goto cleanup_rb;
804
805         return rec;
806
807 cleanup_rb:
808         rb = rb_first(&rec->holes);
809         while (rb) {
810                 struct file_extent_hole *hole;
811
812                 hole = rb_entry(rb, struct file_extent_hole, node);
813                 rb = rb_next(rb);
814                 free(hole);
815         }
816
817 cleanup:
818         if (!list_empty(&rec->backrefs))
819                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820                         list_del(&orig->list);
821                         free(orig);
822                 }
823
824         if (!list_empty(&rec->orphan_extents))
825                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826                         list_del(&orig->list);
827                         free(orig);
828                 }
829
830         free(rec);
831
832         return ERR_PTR(ret);
833 }
834
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
836                                       u64 objectid)
837 {
838         struct orphan_data_extent *orphan;
839
840         if (list_empty(orphan_extents))
841                 return;
842         printf("The following data extent is lost in tree %llu:\n",
843                objectid);
844         list_for_each_entry(orphan, orphan_extents, list) {
845                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
847                        orphan->disk_len);
848         }
849 }
850
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
852 {
853         u64 root_objectid = root->root_key.objectid;
854         int errors = rec->errors;
855
856         if (!errors)
857                 return;
858         /* reloc root errors, we print its corresponding fs root objectid*/
859         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860                 root_objectid = root->root_key.offset;
861                 fprintf(stderr, "reloc");
862         }
863         fprintf(stderr, "root %llu inode %llu errors %x",
864                 (unsigned long long) root_objectid,
865                 (unsigned long long) rec->ino, rec->errors);
866
867         if (errors & I_ERR_NO_INODE_ITEM)
868                 fprintf(stderr, ", no inode item");
869         if (errors & I_ERR_NO_ORPHAN_ITEM)
870                 fprintf(stderr, ", no orphan item");
871         if (errors & I_ERR_DUP_INODE_ITEM)
872                 fprintf(stderr, ", dup inode item");
873         if (errors & I_ERR_DUP_DIR_INDEX)
874                 fprintf(stderr, ", dup dir index");
875         if (errors & I_ERR_ODD_DIR_ITEM)
876                 fprintf(stderr, ", odd dir item");
877         if (errors & I_ERR_ODD_FILE_EXTENT)
878                 fprintf(stderr, ", odd file extent");
879         if (errors & I_ERR_BAD_FILE_EXTENT)
880                 fprintf(stderr, ", bad file extent");
881         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882                 fprintf(stderr, ", file extent overlap");
883         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884                 fprintf(stderr, ", file extent discount");
885         if (errors & I_ERR_DIR_ISIZE_WRONG)
886                 fprintf(stderr, ", dir isize wrong");
887         if (errors & I_ERR_FILE_NBYTES_WRONG)
888                 fprintf(stderr, ", nbytes wrong");
889         if (errors & I_ERR_ODD_CSUM_ITEM)
890                 fprintf(stderr, ", odd csum item");
891         if (errors & I_ERR_SOME_CSUM_MISSING)
892                 fprintf(stderr, ", some csum missing");
893         if (errors & I_ERR_LINK_COUNT_WRONG)
894                 fprintf(stderr, ", link count wrong");
895         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896                 fprintf(stderr, ", orphan file extent");
897         fprintf(stderr, "\n");
898         /* Print the orphan extents if needed */
899         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
901
902         /* Print the holes if needed */
903         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904                 struct file_extent_hole *hole;
905                 struct rb_node *node;
906                 int found = 0;
907
908                 node = rb_first(&rec->holes);
909                 fprintf(stderr, "Found file extent holes:\n");
910                 while (node) {
911                         found = 1;
912                         hole = rb_entry(node, struct file_extent_hole, node);
913                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
914                                 hole->start, hole->len);
915                         node = rb_next(node);
916                 }
917                 if (!found)
918                         fprintf(stderr, "\tstart: 0, len: %llu\n",
919                                 round_up(rec->isize,
920                                          root->fs_info->sectorsize));
921         }
922 }
923
924 static void print_ref_error(int errors)
925 {
926         if (errors & REF_ERR_NO_DIR_ITEM)
927                 fprintf(stderr, ", no dir item");
928         if (errors & REF_ERR_NO_DIR_INDEX)
929                 fprintf(stderr, ", no dir index");
930         if (errors & REF_ERR_NO_INODE_REF)
931                 fprintf(stderr, ", no inode ref");
932         if (errors & REF_ERR_DUP_DIR_ITEM)
933                 fprintf(stderr, ", dup dir item");
934         if (errors & REF_ERR_DUP_DIR_INDEX)
935                 fprintf(stderr, ", dup dir index");
936         if (errors & REF_ERR_DUP_INODE_REF)
937                 fprintf(stderr, ", dup inode ref");
938         if (errors & REF_ERR_INDEX_UNMATCH)
939                 fprintf(stderr, ", index mismatch");
940         if (errors & REF_ERR_FILETYPE_UNMATCH)
941                 fprintf(stderr, ", filetype mismatch");
942         if (errors & REF_ERR_NAME_TOO_LONG)
943                 fprintf(stderr, ", name too long");
944         if (errors & REF_ERR_NO_ROOT_REF)
945                 fprintf(stderr, ", no root ref");
946         if (errors & REF_ERR_NO_ROOT_BACKREF)
947                 fprintf(stderr, ", no root backref");
948         if (errors & REF_ERR_DUP_ROOT_REF)
949                 fprintf(stderr, ", dup root ref");
950         if (errors & REF_ERR_DUP_ROOT_BACKREF)
951                 fprintf(stderr, ", dup root backref");
952         fprintf(stderr, "\n");
953 }
954
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956                                           u64 ino, int mod)
957 {
958         struct ptr_node *node;
959         struct cache_extent *cache;
960         struct inode_record *rec = NULL;
961         int ret;
962
963         cache = lookup_cache_extent(inode_cache, ino, 1);
964         if (cache) {
965                 node = container_of(cache, struct ptr_node, cache);
966                 rec = node->data;
967                 if (mod && rec->refs > 1) {
968                         node->data = clone_inode_rec(rec);
969                         if (IS_ERR(node->data))
970                                 return node->data;
971                         rec->refs--;
972                         rec = node->data;
973                 }
974         } else if (mod) {
975                 rec = calloc(1, sizeof(*rec));
976                 if (!rec)
977                         return ERR_PTR(-ENOMEM);
978                 rec->ino = ino;
979                 rec->extent_start = (u64)-1;
980                 rec->refs = 1;
981                 INIT_LIST_HEAD(&rec->backrefs);
982                 INIT_LIST_HEAD(&rec->orphan_extents);
983                 rec->holes = RB_ROOT;
984
985                 node = malloc(sizeof(*node));
986                 if (!node) {
987                         free(rec);
988                         return ERR_PTR(-ENOMEM);
989                 }
990                 node->cache.start = ino;
991                 node->cache.size = 1;
992                 node->data = rec;
993
994                 if (ino == BTRFS_FREE_INO_OBJECTID)
995                         rec->found_link = 1;
996
997                 ret = insert_cache_extent(inode_cache, &node->cache);
998                 if (ret)
999                         return ERR_PTR(-EEXIST);
1000         }
1001         return rec;
1002 }
1003
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1005 {
1006         struct orphan_data_extent *orphan;
1007
1008         while (!list_empty(orphan_extents)) {
1009                 orphan = list_entry(orphan_extents->next,
1010                                     struct orphan_data_extent, list);
1011                 list_del(&orphan->list);
1012                 free(orphan);
1013         }
1014 }
1015
1016 static void free_inode_rec(struct inode_record *rec)
1017 {
1018         struct inode_backref *backref;
1019
1020         if (--rec->refs > 0)
1021                 return;
1022
1023         while (!list_empty(&rec->backrefs)) {
1024                 backref = to_inode_backref(rec->backrefs.next);
1025                 list_del(&backref->list);
1026                 free(backref);
1027         }
1028         free_orphan_data_extents(&rec->orphan_extents);
1029         free_file_extent_holes(&rec->holes);
1030         free(rec);
1031 }
1032
1033 static int can_free_inode_rec(struct inode_record *rec)
1034 {
1035         if (!rec->errors && rec->checked && rec->found_inode_item &&
1036             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1037                 return 1;
1038         return 0;
1039 }
1040
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042                                  struct inode_record *rec)
1043 {
1044         struct cache_extent *cache;
1045         struct inode_backref *tmp, *backref;
1046         struct ptr_node *node;
1047         u8 filetype;
1048
1049         if (!rec->found_inode_item)
1050                 return;
1051
1052         filetype = imode_to_type(rec->imode);
1053         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054                 if (backref->found_dir_item && backref->found_dir_index) {
1055                         if (backref->filetype != filetype)
1056                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057                         if (!backref->errors && backref->found_inode_ref &&
1058                             rec->nlink == rec->found_link) {
1059                                 list_del(&backref->list);
1060                                 free(backref);
1061                         }
1062                 }
1063         }
1064
1065         if (!rec->checked || rec->merging)
1066                 return;
1067
1068         if (S_ISDIR(rec->imode)) {
1069                 if (rec->found_size != rec->isize)
1070                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071                 if (rec->found_file_extent)
1072                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074                 if (rec->found_dir_item)
1075                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1076                 if (rec->found_size != rec->nbytes)
1077                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078                 if (rec->nlink > 0 && !no_holes &&
1079                     (rec->extent_end < rec->isize ||
1080                      first_extent_gap(&rec->holes) < rec->isize))
1081                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082         }
1083
1084         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085                 if (rec->found_csum_item && rec->nodatasum)
1086                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087                 if (rec->some_csum_missing && !rec->nodatasum)
1088                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089         }
1090
1091         BUG_ON(rec->refs != 1);
1092         if (can_free_inode_rec(rec)) {
1093                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094                 node = container_of(cache, struct ptr_node, cache);
1095                 BUG_ON(node->data != rec);
1096                 remove_cache_extent(inode_cache, &node->cache);
1097                 free(node);
1098                 free_inode_rec(rec);
1099         }
1100 }
1101
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1103 {
1104         struct btrfs_path path;
1105         struct btrfs_key key;
1106         int ret;
1107
1108         key.objectid = BTRFS_ORPHAN_OBJECTID;
1109         key.type = BTRFS_ORPHAN_ITEM_KEY;
1110         key.offset = ino;
1111
1112         btrfs_init_path(&path);
1113         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114         btrfs_release_path(&path);
1115         if (ret > 0)
1116                 ret = -ENOENT;
1117         return ret;
1118 }
1119
1120 static int process_inode_item(struct extent_buffer *eb,
1121                               int slot, struct btrfs_key *key,
1122                               struct shared_node *active_node)
1123 {
1124         struct inode_record *rec;
1125         struct btrfs_inode_item *item;
1126
1127         rec = active_node->current;
1128         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129         if (rec->found_inode_item) {
1130                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131                 return 1;
1132         }
1133         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134         rec->nlink = btrfs_inode_nlink(eb, item);
1135         rec->isize = btrfs_inode_size(eb, item);
1136         rec->nbytes = btrfs_inode_nbytes(eb, item);
1137         rec->imode = btrfs_inode_mode(eb, item);
1138         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1139                 rec->nodatasum = 1;
1140         rec->found_inode_item = 1;
1141         if (rec->nlink == 0)
1142                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143         maybe_free_inode_rec(&active_node->inode_cache, rec);
1144         return 0;
1145 }
1146
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1148                                                 const char *name,
1149                                                 int namelen, u64 dir)
1150 {
1151         struct inode_backref *backref;
1152
1153         list_for_each_entry(backref, &rec->backrefs, list) {
1154                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1155                         break;
1156                 if (backref->dir != dir || backref->namelen != namelen)
1157                         continue;
1158                 if (memcmp(name, backref->name, namelen))
1159                         continue;
1160                 return backref;
1161         }
1162
1163         backref = malloc(sizeof(*backref) + namelen + 1);
1164         if (!backref)
1165                 return NULL;
1166         memset(backref, 0, sizeof(*backref));
1167         backref->dir = dir;
1168         backref->namelen = namelen;
1169         memcpy(backref->name, name, namelen);
1170         backref->name[namelen] = '\0';
1171         list_add_tail(&backref->list, &rec->backrefs);
1172         return backref;
1173 }
1174
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176                              u64 ino, u64 dir, u64 index,
1177                              const char *name, int namelen,
1178                              u8 filetype, u8 itemtype, int errors)
1179 {
1180         struct inode_record *rec;
1181         struct inode_backref *backref;
1182
1183         rec = get_inode_rec(inode_cache, ino, 1);
1184         BUG_ON(IS_ERR(rec));
1185         backref = get_inode_backref(rec, name, namelen, dir);
1186         BUG_ON(!backref);
1187         if (errors)
1188                 backref->errors |= errors;
1189         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190                 if (backref->found_dir_index)
1191                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192                 if (backref->found_inode_ref && backref->index != index)
1193                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1194                 if (backref->found_dir_item && backref->filetype != filetype)
1195                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1196
1197                 backref->index = index;
1198                 backref->filetype = filetype;
1199                 backref->found_dir_index = 1;
1200         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1201                 rec->found_link++;
1202                 if (backref->found_dir_item)
1203                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204                 if (backref->found_dir_index && backref->filetype != filetype)
1205                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1206
1207                 backref->filetype = filetype;
1208                 backref->found_dir_item = 1;
1209         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211                 if (backref->found_inode_ref)
1212                         backref->errors |= REF_ERR_DUP_INODE_REF;
1213                 if (backref->found_dir_index && backref->index != index)
1214                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1215                 else
1216                         backref->index = index;
1217
1218                 backref->ref_type = itemtype;
1219                 backref->found_inode_ref = 1;
1220         } else {
1221                 BUG_ON(1);
1222         }
1223
1224         maybe_free_inode_rec(inode_cache, rec);
1225         return 0;
1226 }
1227
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229                             struct cache_tree *dst_cache)
1230 {
1231         struct inode_backref *backref;
1232         u32 dir_count = 0;
1233         int ret = 0;
1234
1235         dst->merging = 1;
1236         list_for_each_entry(backref, &src->backrefs, list) {
1237                 if (backref->found_dir_index) {
1238                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1239                                         backref->index, backref->name,
1240                                         backref->namelen, backref->filetype,
1241                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1242                 }
1243                 if (backref->found_dir_item) {
1244                         dir_count++;
1245                         add_inode_backref(dst_cache, dst->ino,
1246                                         backref->dir, 0, backref->name,
1247                                         backref->namelen, backref->filetype,
1248                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1249                 }
1250                 if (backref->found_inode_ref) {
1251                         add_inode_backref(dst_cache, dst->ino,
1252                                         backref->dir, backref->index,
1253                                         backref->name, backref->namelen, 0,
1254                                         backref->ref_type, backref->errors);
1255                 }
1256         }
1257
1258         if (src->found_dir_item)
1259                 dst->found_dir_item = 1;
1260         if (src->found_file_extent)
1261                 dst->found_file_extent = 1;
1262         if (src->found_csum_item)
1263                 dst->found_csum_item = 1;
1264         if (src->some_csum_missing)
1265                 dst->some_csum_missing = 1;
1266         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1268                 if (ret < 0)
1269                         return ret;
1270         }
1271
1272         BUG_ON(src->found_link < dir_count);
1273         dst->found_link += src->found_link - dir_count;
1274         dst->found_size += src->found_size;
1275         if (src->extent_start != (u64)-1) {
1276                 if (dst->extent_start == (u64)-1) {
1277                         dst->extent_start = src->extent_start;
1278                         dst->extent_end = src->extent_end;
1279                 } else {
1280                         if (dst->extent_end > src->extent_start)
1281                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282                         else if (dst->extent_end < src->extent_start) {
1283                                 ret = add_file_extent_hole(&dst->holes,
1284                                         dst->extent_end,
1285                                         src->extent_start - dst->extent_end);
1286                         }
1287                         if (dst->extent_end < src->extent_end)
1288                                 dst->extent_end = src->extent_end;
1289                 }
1290         }
1291
1292         dst->errors |= src->errors;
1293         if (src->found_inode_item) {
1294                 if (!dst->found_inode_item) {
1295                         dst->nlink = src->nlink;
1296                         dst->isize = src->isize;
1297                         dst->nbytes = src->nbytes;
1298                         dst->imode = src->imode;
1299                         dst->nodatasum = src->nodatasum;
1300                         dst->found_inode_item = 1;
1301                 } else {
1302                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1303                 }
1304         }
1305         dst->merging = 0;
1306
1307         return 0;
1308 }
1309
1310 static int splice_shared_node(struct shared_node *src_node,
1311                               struct shared_node *dst_node)
1312 {
1313         struct cache_extent *cache;
1314         struct ptr_node *node, *ins;
1315         struct cache_tree *src, *dst;
1316         struct inode_record *rec, *conflict;
1317         u64 current_ino = 0;
1318         int splice = 0;
1319         int ret;
1320
1321         if (--src_node->refs == 0)
1322                 splice = 1;
1323         if (src_node->current)
1324                 current_ino = src_node->current->ino;
1325
1326         src = &src_node->root_cache;
1327         dst = &dst_node->root_cache;
1328 again:
1329         cache = search_cache_extent(src, 0);
1330         while (cache) {
1331                 node = container_of(cache, struct ptr_node, cache);
1332                 rec = node->data;
1333                 cache = next_cache_extent(cache);
1334
1335                 if (splice) {
1336                         remove_cache_extent(src, &node->cache);
1337                         ins = node;
1338                 } else {
1339                         ins = malloc(sizeof(*ins));
1340                         BUG_ON(!ins);
1341                         ins->cache.start = node->cache.start;
1342                         ins->cache.size = node->cache.size;
1343                         ins->data = rec;
1344                         rec->refs++;
1345                 }
1346                 ret = insert_cache_extent(dst, &ins->cache);
1347                 if (ret == -EEXIST) {
1348                         conflict = get_inode_rec(dst, rec->ino, 1);
1349                         BUG_ON(IS_ERR(conflict));
1350                         merge_inode_recs(rec, conflict, dst);
1351                         if (rec->checked) {
1352                                 conflict->checked = 1;
1353                                 if (dst_node->current == conflict)
1354                                         dst_node->current = NULL;
1355                         }
1356                         maybe_free_inode_rec(dst, conflict);
1357                         free_inode_rec(rec);
1358                         free(ins);
1359                 } else {
1360                         BUG_ON(ret);
1361                 }
1362         }
1363
1364         if (src == &src_node->root_cache) {
1365                 src = &src_node->inode_cache;
1366                 dst = &dst_node->inode_cache;
1367                 goto again;
1368         }
1369
1370         if (current_ino > 0 && (!dst_node->current ||
1371             current_ino > dst_node->current->ino)) {
1372                 if (dst_node->current) {
1373                         dst_node->current->checked = 1;
1374                         maybe_free_inode_rec(dst, dst_node->current);
1375                 }
1376                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377                 BUG_ON(IS_ERR(dst_node->current));
1378         }
1379         return 0;
1380 }
1381
1382 static void free_inode_ptr(struct cache_extent *cache)
1383 {
1384         struct ptr_node *node;
1385         struct inode_record *rec;
1386
1387         node = container_of(cache, struct ptr_node, cache);
1388         rec = node->data;
1389         free_inode_rec(rec);
1390         free(node);
1391 }
1392
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1394
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396                                             u64 bytenr)
1397 {
1398         struct cache_extent *cache;
1399         struct shared_node *node;
1400
1401         cache = lookup_cache_extent(shared, bytenr, 1);
1402         if (cache) {
1403                 node = container_of(cache, struct shared_node, cache);
1404                 return node;
1405         }
1406         return NULL;
1407 }
1408
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 {
1411         int ret;
1412         struct shared_node *node;
1413
1414         node = calloc(1, sizeof(*node));
1415         if (!node)
1416                 return -ENOMEM;
1417         node->cache.start = bytenr;
1418         node->cache.size = 1;
1419         cache_tree_init(&node->root_cache);
1420         cache_tree_init(&node->inode_cache);
1421         node->refs = refs;
1422
1423         ret = insert_cache_extent(shared, &node->cache);
1424
1425         return ret;
1426 }
1427
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429                              struct walk_control *wc, int level)
1430 {
1431         struct shared_node *node;
1432         struct shared_node *dest;
1433         int ret;
1434
1435         if (level == wc->active_node)
1436                 return 0;
1437
1438         BUG_ON(wc->active_node <= level);
1439         node = find_shared_node(&wc->shared, bytenr);
1440         if (!node) {
1441                 ret = add_shared_node(&wc->shared, bytenr, refs);
1442                 BUG_ON(ret);
1443                 node = find_shared_node(&wc->shared, bytenr);
1444                 wc->nodes[level] = node;
1445                 wc->active_node = level;
1446                 return 0;
1447         }
1448
1449         if (wc->root_level == wc->active_node &&
1450             btrfs_root_refs(&root->root_item) == 0) {
1451                 if (--node->refs == 0) {
1452                         free_inode_recs_tree(&node->root_cache);
1453                         free_inode_recs_tree(&node->inode_cache);
1454                         remove_cache_extent(&wc->shared, &node->cache);
1455                         free(node);
1456                 }
1457                 return 1;
1458         }
1459
1460         dest = wc->nodes[wc->active_node];
1461         splice_shared_node(node, dest);
1462         if (node->refs == 0) {
1463                 remove_cache_extent(&wc->shared, &node->cache);
1464                 free(node);
1465         }
1466         return 1;
1467 }
1468
1469 static int leave_shared_node(struct btrfs_root *root,
1470                              struct walk_control *wc, int level)
1471 {
1472         struct shared_node *node;
1473         struct shared_node *dest;
1474         int i;
1475
1476         if (level == wc->root_level)
1477                 return 0;
1478
1479         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1480                 if (wc->nodes[i])
1481                         break;
1482         }
1483         BUG_ON(i >= BTRFS_MAX_LEVEL);
1484
1485         node = wc->nodes[wc->active_node];
1486         wc->nodes[wc->active_node] = NULL;
1487         wc->active_node = i;
1488
1489         dest = wc->nodes[wc->active_node];
1490         if (wc->active_node < wc->root_level ||
1491             btrfs_root_refs(&root->root_item) > 0) {
1492                 BUG_ON(node->refs <= 1);
1493                 splice_shared_node(node, dest);
1494         } else {
1495                 BUG_ON(node->refs < 2);
1496                 node->refs--;
1497         }
1498         return 0;
1499 }
1500
1501 /*
1502  * Returns:
1503  * < 0 - on error
1504  * 1   - if the root with id child_root_id is a child of root parent_root_id
1505  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1506  *       has other root(s) as parent(s)
1507  * 2   - if the root child_root_id doesn't have any parent roots
1508  */
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510                          u64 child_root_id)
1511 {
1512         struct btrfs_path path;
1513         struct btrfs_key key;
1514         struct extent_buffer *leaf;
1515         int has_parent = 0;
1516         int ret;
1517
1518         btrfs_init_path(&path);
1519
1520         key.objectid = parent_root_id;
1521         key.type = BTRFS_ROOT_REF_KEY;
1522         key.offset = child_root_id;
1523         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1524                                 0, 0);
1525         if (ret < 0)
1526                 return ret;
1527         btrfs_release_path(&path);
1528         if (!ret)
1529                 return 1;
1530
1531         key.objectid = child_root_id;
1532         key.type = BTRFS_ROOT_BACKREF_KEY;
1533         key.offset = 0;
1534         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1535                                 0, 0);
1536         if (ret < 0)
1537                 goto out;
1538
1539         while (1) {
1540                 leaf = path.nodes[0];
1541                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543                         if (ret)
1544                                 break;
1545                         leaf = path.nodes[0];
1546                 }
1547
1548                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549                 if (key.objectid != child_root_id ||
1550                     key.type != BTRFS_ROOT_BACKREF_KEY)
1551                         break;
1552
1553                 has_parent = 1;
1554
1555                 if (key.offset == parent_root_id) {
1556                         btrfs_release_path(&path);
1557                         return 1;
1558                 }
1559
1560                 path.slots[0]++;
1561         }
1562 out:
1563         btrfs_release_path(&path);
1564         if (ret < 0)
1565                 return ret;
1566         return has_parent ? 0 : 2;
1567 }
1568
1569 static int process_dir_item(struct extent_buffer *eb,
1570                             int slot, struct btrfs_key *key,
1571                             struct shared_node *active_node)
1572 {
1573         u32 total;
1574         u32 cur = 0;
1575         u32 len;
1576         u32 name_len;
1577         u32 data_len;
1578         int error;
1579         int nritems = 0;
1580         u8 filetype;
1581         struct btrfs_dir_item *di;
1582         struct inode_record *rec;
1583         struct cache_tree *root_cache;
1584         struct cache_tree *inode_cache;
1585         struct btrfs_key location;
1586         char namebuf[BTRFS_NAME_LEN];
1587
1588         root_cache = &active_node->root_cache;
1589         inode_cache = &active_node->inode_cache;
1590         rec = active_node->current;
1591         rec->found_dir_item = 1;
1592
1593         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594         total = btrfs_item_size_nr(eb, slot);
1595         while (cur < total) {
1596                 nritems++;
1597                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598                 name_len = btrfs_dir_name_len(eb, di);
1599                 data_len = btrfs_dir_data_len(eb, di);
1600                 filetype = btrfs_dir_type(eb, di);
1601
1602                 rec->found_size += name_len;
1603                 if (cur + sizeof(*di) + name_len > total ||
1604                     name_len > BTRFS_NAME_LEN) {
1605                         error = REF_ERR_NAME_TOO_LONG;
1606
1607                         if (cur + sizeof(*di) > total)
1608                                 break;
1609                         len = min_t(u32, total - cur - sizeof(*di),
1610                                     BTRFS_NAME_LEN);
1611                 } else {
1612                         len = name_len;
1613                         error = 0;
1614                 }
1615
1616                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1617
1618                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619                     key->offset != btrfs_name_hash(namebuf, len)) {
1620                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1621                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622                         key->objectid, key->offset, namebuf, len, filetype,
1623                         key->offset, btrfs_name_hash(namebuf, len));
1624                 }
1625
1626                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627                         add_inode_backref(inode_cache, location.objectid,
1628                                           key->objectid, key->offset, namebuf,
1629                                           len, filetype, key->type, error);
1630                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631                         add_inode_backref(root_cache, location.objectid,
1632                                           key->objectid, key->offset,
1633                                           namebuf, len, filetype,
1634                                           key->type, error);
1635                 } else {
1636                         fprintf(stderr, "invalid location in dir item %u\n",
1637                                 location.type);
1638                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639                                           key->objectid, key->offset, namebuf,
1640                                           len, filetype, key->type, error);
1641                 }
1642
1643                 len = sizeof(*di) + name_len + data_len;
1644                 di = (struct btrfs_dir_item *)((char *)di + len);
1645                 cur += len;
1646         }
1647         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1649
1650         return 0;
1651 }
1652
1653 static int process_inode_ref(struct extent_buffer *eb,
1654                              int slot, struct btrfs_key *key,
1655                              struct shared_node *active_node)
1656 {
1657         u32 total;
1658         u32 cur = 0;
1659         u32 len;
1660         u32 name_len;
1661         u64 index;
1662         int error;
1663         struct cache_tree *inode_cache;
1664         struct btrfs_inode_ref *ref;
1665         char namebuf[BTRFS_NAME_LEN];
1666
1667         inode_cache = &active_node->inode_cache;
1668
1669         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670         total = btrfs_item_size_nr(eb, slot);
1671         while (cur < total) {
1672                 name_len = btrfs_inode_ref_name_len(eb, ref);
1673                 index = btrfs_inode_ref_index(eb, ref);
1674
1675                 /* inode_ref + namelen should not cross item boundary */
1676                 if (cur + sizeof(*ref) + name_len > total ||
1677                     name_len > BTRFS_NAME_LEN) {
1678                         if (total < cur + sizeof(*ref))
1679                                 break;
1680
1681                         /* Still try to read out the remaining part */
1682                         len = min_t(u32, total - cur - sizeof(*ref),
1683                                     BTRFS_NAME_LEN);
1684                         error = REF_ERR_NAME_TOO_LONG;
1685                 } else {
1686                         len = name_len;
1687                         error = 0;
1688                 }
1689
1690                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691                 add_inode_backref(inode_cache, key->objectid, key->offset,
1692                                   index, namebuf, len, 0, key->type, error);
1693
1694                 len = sizeof(*ref) + name_len;
1695                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1696                 cur += len;
1697         }
1698         return 0;
1699 }
1700
1701 static int process_inode_extref(struct extent_buffer *eb,
1702                                 int slot, struct btrfs_key *key,
1703                                 struct shared_node *active_node)
1704 {
1705         u32 total;
1706         u32 cur = 0;
1707         u32 len;
1708         u32 name_len;
1709         u64 index;
1710         u64 parent;
1711         int error;
1712         struct cache_tree *inode_cache;
1713         struct btrfs_inode_extref *extref;
1714         char namebuf[BTRFS_NAME_LEN];
1715
1716         inode_cache = &active_node->inode_cache;
1717
1718         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719         total = btrfs_item_size_nr(eb, slot);
1720         while (cur < total) {
1721                 name_len = btrfs_inode_extref_name_len(eb, extref);
1722                 index = btrfs_inode_extref_index(eb, extref);
1723                 parent = btrfs_inode_extref_parent(eb, extref);
1724                 if (name_len <= BTRFS_NAME_LEN) {
1725                         len = name_len;
1726                         error = 0;
1727                 } else {
1728                         len = BTRFS_NAME_LEN;
1729                         error = REF_ERR_NAME_TOO_LONG;
1730                 }
1731                 read_extent_buffer(eb, namebuf,
1732                                    (unsigned long)(extref + 1), len);
1733                 add_inode_backref(inode_cache, key->objectid, parent,
1734                                   index, namebuf, len, 0, key->type, error);
1735
1736                 len = sizeof(*extref) + name_len;
1737                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1738                 cur += len;
1739         }
1740         return 0;
1741
1742 }
1743
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745                             u64 len, u64 *found)
1746 {
1747         struct btrfs_key key;
1748         struct btrfs_path path;
1749         struct extent_buffer *leaf;
1750         int ret;
1751         size_t size;
1752         *found = 0;
1753         u64 csum_end;
1754         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1755
1756         btrfs_init_path(&path);
1757
1758         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1759         key.offset = start;
1760         key.type = BTRFS_EXTENT_CSUM_KEY;
1761
1762         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1763                                 &key, &path, 0, 0);
1764         if (ret < 0)
1765                 goto out;
1766         if (ret > 0 && path.slots[0] > 0) {
1767                 leaf = path.nodes[0];
1768                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770                     key.type == BTRFS_EXTENT_CSUM_KEY)
1771                         path.slots[0]--;
1772         }
1773
1774         while (len > 0) {
1775                 leaf = path.nodes[0];
1776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1778                         if (ret > 0)
1779                                 break;
1780                         else if (ret < 0)
1781                                 goto out;
1782                         leaf = path.nodes[0];
1783                 }
1784
1785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787                     key.type != BTRFS_EXTENT_CSUM_KEY)
1788                         break;
1789
1790                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791                 if (key.offset >= start + len)
1792                         break;
1793
1794                 if (key.offset > start)
1795                         start = key.offset;
1796
1797                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798                 csum_end = key.offset + (size / csum_size) *
1799                            root->fs_info->sectorsize;
1800                 if (csum_end > start) {
1801                         size = min(csum_end - start, len);
1802                         len -= size;
1803                         start += size;
1804                         *found += size;
1805                 }
1806
1807                 path.slots[0]++;
1808         }
1809 out:
1810         btrfs_release_path(&path);
1811         if (ret < 0)
1812                 return ret;
1813         return 0;
1814 }
1815
1816 static int process_file_extent(struct btrfs_root *root,
1817                                 struct extent_buffer *eb,
1818                                 int slot, struct btrfs_key *key,
1819                                 struct shared_node *active_node)
1820 {
1821         struct inode_record *rec;
1822         struct btrfs_file_extent_item *fi;
1823         u64 num_bytes = 0;
1824         u64 disk_bytenr = 0;
1825         u64 extent_offset = 0;
1826         u64 mask = root->fs_info->sectorsize - 1;
1827         int extent_type;
1828         int ret;
1829
1830         rec = active_node->current;
1831         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832         rec->found_file_extent = 1;
1833
1834         if (rec->extent_start == (u64)-1) {
1835                 rec->extent_start = key->offset;
1836                 rec->extent_end = key->offset;
1837         }
1838
1839         if (rec->extent_end > key->offset)
1840                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841         else if (rec->extent_end < key->offset) {
1842                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843                                            key->offset - rec->extent_end);
1844                 if (ret < 0)
1845                         return ret;
1846         }
1847
1848         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849         extent_type = btrfs_file_extent_type(eb, fi);
1850
1851         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1853                 if (num_bytes == 0)
1854                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855                 rec->found_size += num_bytes;
1856                 num_bytes = (num_bytes + mask) & ~mask;
1857         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861                 extent_offset = btrfs_file_extent_offset(eb, fi);
1862                 if (num_bytes == 0 || (num_bytes & mask))
1863                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864                 if (num_bytes + extent_offset >
1865                     btrfs_file_extent_ram_bytes(eb, fi))
1866                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868                     (btrfs_file_extent_compression(eb, fi) ||
1869                      btrfs_file_extent_encryption(eb, fi) ||
1870                      btrfs_file_extent_other_encoding(eb, fi)))
1871                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872                 if (disk_bytenr > 0)
1873                         rec->found_size += num_bytes;
1874         } else {
1875                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1876         }
1877         rec->extent_end = key->offset + num_bytes;
1878
1879         /*
1880          * The data reloc tree will copy full extents into its inode and then
1881          * copy the corresponding csums.  Because the extent it copied could be
1882          * a preallocated extent that hasn't been written to yet there may be no
1883          * csums to copy, ergo we won't have csums for our file extent.  This is
1884          * ok so just don't bother checking csums if the inode belongs to the
1885          * data reloc tree.
1886          */
1887         if (disk_bytenr > 0 &&
1888             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1889                 u64 found;
1890                 if (btrfs_file_extent_compression(eb, fi))
1891                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1892                 else
1893                         disk_bytenr += extent_offset;
1894
1895                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896                 if (ret < 0)
1897                         return ret;
1898                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1899                         if (found > 0)
1900                                 rec->found_csum_item = 1;
1901                         if (found < num_bytes)
1902                                 rec->some_csum_missing = 1;
1903                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1904                         if (found > 0)
1905                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1906                 }
1907         }
1908         return 0;
1909 }
1910
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912                             struct walk_control *wc)
1913 {
1914         struct btrfs_key key;
1915         u32 nritems;
1916         int i;
1917         int ret = 0;
1918         struct cache_tree *inode_cache;
1919         struct shared_node *active_node;
1920
1921         if (wc->root_level == wc->active_node &&
1922             btrfs_root_refs(&root->root_item) == 0)
1923                 return 0;
1924
1925         active_node = wc->nodes[wc->active_node];
1926         inode_cache = &active_node->inode_cache;
1927         nritems = btrfs_header_nritems(eb);
1928         for (i = 0; i < nritems; i++) {
1929                 btrfs_item_key_to_cpu(eb, &key, i);
1930
1931                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1932                         continue;
1933                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934                         continue;
1935
1936                 if (active_node->current == NULL ||
1937                     active_node->current->ino < key.objectid) {
1938                         if (active_node->current) {
1939                                 active_node->current->checked = 1;
1940                                 maybe_free_inode_rec(inode_cache,
1941                                                      active_node->current);
1942                         }
1943                         active_node->current = get_inode_rec(inode_cache,
1944                                                              key.objectid, 1);
1945                         BUG_ON(IS_ERR(active_node->current));
1946                 }
1947                 switch (key.type) {
1948                 case BTRFS_DIR_ITEM_KEY:
1949                 case BTRFS_DIR_INDEX_KEY:
1950                         ret = process_dir_item(eb, i, &key, active_node);
1951                         break;
1952                 case BTRFS_INODE_REF_KEY:
1953                         ret = process_inode_ref(eb, i, &key, active_node);
1954                         break;
1955                 case BTRFS_INODE_EXTREF_KEY:
1956                         ret = process_inode_extref(eb, i, &key, active_node);
1957                         break;
1958                 case BTRFS_INODE_ITEM_KEY:
1959                         ret = process_inode_item(eb, i, &key, active_node);
1960                         break;
1961                 case BTRFS_EXTENT_DATA_KEY:
1962                         ret = process_file_extent(root, eb, i, &key,
1963                                                   active_node);
1964                         break;
1965                 default:
1966                         break;
1967                 };
1968         }
1969         return ret;
1970 }
1971
1972 struct node_refs {
1973         u64 bytenr[BTRFS_MAX_LEVEL];
1974         u64 refs[BTRFS_MAX_LEVEL];
1975         int need_check[BTRFS_MAX_LEVEL];
1976 };
1977
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979                              struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981                             unsigned int ext_ref);
1982
1983 /*
1984  * Returns >0  Found error, not fatal, should continue
1985  * Returns <0  Fatal error, must exit the whole check
1986  * Returns 0   No errors found
1987  */
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989                                struct node_refs *nrefs, int *level, int ext_ref)
1990 {
1991         struct extent_buffer *cur = path->nodes[0];
1992         struct btrfs_key key;
1993         u64 cur_bytenr;
1994         u32 nritems;
1995         u64 first_ino = 0;
1996         int root_level = btrfs_header_level(root->node);
1997         int i;
1998         int ret = 0; /* Final return value */
1999         int err = 0; /* Positive error bitmap */
2000
2001         cur_bytenr = cur->start;
2002
2003         /* skip to first inode item or the first inode number change */
2004         nritems = btrfs_header_nritems(cur);
2005         for (i = 0; i < nritems; i++) {
2006                 btrfs_item_key_to_cpu(cur, &key, i);
2007                 if (i == 0)
2008                         first_ino = key.objectid;
2009                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010                     (first_ino && first_ino != key.objectid))
2011                         break;
2012         }
2013         if (i == nritems) {
2014                 path->slots[0] = nritems;
2015                 return 0;
2016         }
2017         path->slots[0] = i;
2018
2019 again:
2020         err |= check_inode_item(root, path, ext_ref);
2021
2022         /* modify cur since check_inode_item may change path */
2023         cur = path->nodes[0];
2024
2025         if (err & LAST_ITEM)
2026                 goto out;
2027
2028         /* still have inode items in thie leaf */
2029         if (cur->start == cur_bytenr)
2030                 goto again;
2031
2032         /*
2033          * we have switched to another leaf, above nodes may
2034          * have changed, here walk down the path, if a node
2035          * or leaf is shared, check whether we can skip this
2036          * node or leaf.
2037          */
2038         for (i = root_level; i >= 0; i--) {
2039                 if (path->nodes[i]->start == nrefs->bytenr[i])
2040                         continue;
2041
2042                 ret = update_nodes_refs(root,
2043                                 path->nodes[i]->start,
2044                                 nrefs, i);
2045                 if (ret)
2046                         goto out;
2047
2048                 if (!nrefs->need_check[i]) {
2049                         *level += 1;
2050                         break;
2051                 }
2052         }
2053
2054         for (i = 0; i < *level; i++) {
2055                 free_extent_buffer(path->nodes[i]);
2056                 path->nodes[i] = NULL;
2057         }
2058 out:
2059         err &= ~LAST_ITEM;
2060         if (err && !ret)
2061                 ret = err;
2062         return ret;
2063 }
2064
2065 static void reada_walk_down(struct btrfs_root *root,
2066                             struct extent_buffer *node, int slot)
2067 {
2068         struct btrfs_fs_info *fs_info = root->fs_info;
2069         u64 bytenr;
2070         u64 ptr_gen;
2071         u32 nritems;
2072         int i;
2073         int level;
2074
2075         level = btrfs_header_level(node);
2076         if (level != 1)
2077                 return;
2078
2079         nritems = btrfs_header_nritems(node);
2080         for (i = slot; i < nritems; i++) {
2081                 bytenr = btrfs_node_blockptr(node, i);
2082                 ptr_gen = btrfs_node_ptr_generation(node, i);
2083                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2084         }
2085 }
2086
2087 /*
2088  * Check the child node/leaf by the following condition:
2089  * 1. the first item key of the node/leaf should be the same with the one
2090  *    in parent.
2091  * 2. block in parent node should match the child node/leaf.
2092  * 3. generation of parent node and child's header should be consistent.
2093  *
2094  * Or the child node/leaf pointed by the key in parent is not valid.
2095  *
2096  * We hope to check leaf owner too, but since subvol may share leaves,
2097  * which makes leaf owner check not so strong, key check should be
2098  * sufficient enough for that case.
2099  */
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101                             struct extent_buffer *child)
2102 {
2103         struct btrfs_key parent_key;
2104         struct btrfs_key child_key;
2105         int ret = 0;
2106
2107         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108         if (btrfs_header_level(child) == 0)
2109                 btrfs_item_key_to_cpu(child, &child_key, 0);
2110         else
2111                 btrfs_node_key_to_cpu(child, &child_key, 0);
2112
2113         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114                 ret = -EINVAL;
2115                 fprintf(stderr,
2116                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117                         parent_key.objectid, parent_key.type, parent_key.offset,
2118                         child_key.objectid, child_key.type, child_key.offset);
2119         }
2120         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2121                 ret = -EINVAL;
2122                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123                         btrfs_node_blockptr(parent, slot),
2124                         btrfs_header_bytenr(child));
2125         }
2126         if (btrfs_node_ptr_generation(parent, slot) !=
2127             btrfs_header_generation(child)) {
2128                 ret = -EINVAL;
2129                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130                         btrfs_header_generation(child),
2131                         btrfs_node_ptr_generation(parent, slot));
2132         }
2133         return ret;
2134 }
2135
2136 /*
2137  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138  * in every fs or file tree check. Here we find its all root ids, and only check
2139  * it in the fs or file tree which has the smallest root id.
2140  */
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2142 {
2143         struct rb_node *node;
2144         struct ulist_node *u;
2145
2146         if (roots->nnodes == 1)
2147                 return 1;
2148
2149         node = rb_first(&roots->root);
2150         u = rb_entry(node, struct ulist_node, rb_node);
2151         /*
2152          * current root id is not smallest, we skip it and let it be checked
2153          * in the fs or file tree who hash the smallest root id.
2154          */
2155         if (root->objectid != u->val)
2156                 return 0;
2157
2158         return 1;
2159 }
2160
2161 /*
2162  * for a tree node or leaf, we record its reference count, so later if we still
2163  * process this node or leaf, don't need to compute its reference count again.
2164  */
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166                              struct node_refs *nrefs, u64 level)
2167 {
2168         int check, ret;
2169         u64 refs;
2170         struct ulist *roots;
2171
2172         if (nrefs->bytenr[level] != bytenr) {
2173                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174                                        level, 1, &refs, NULL);
2175                 if (ret < 0)
2176                         return ret;
2177
2178                 nrefs->bytenr[level] = bytenr;
2179                 nrefs->refs[level] = refs;
2180                 if (refs > 1) {
2181                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2182                                                    0, &roots);
2183                         if (ret)
2184                                 return -EIO;
2185
2186                         check = need_check(root, roots);
2187                         ulist_free(roots);
2188                         nrefs->need_check[level] = check;
2189                 } else {
2190                         nrefs->need_check[level] = 1;
2191                 }
2192         }
2193
2194         return 0;
2195 }
2196
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198                           struct walk_control *wc, int *level,
2199                           struct node_refs *nrefs)
2200 {
2201         enum btrfs_tree_block_status status;
2202         u64 bytenr;
2203         u64 ptr_gen;
2204         struct btrfs_fs_info *fs_info = root->fs_info;
2205         struct extent_buffer *next;
2206         struct extent_buffer *cur;
2207         int ret, err = 0;
2208         u64 refs;
2209
2210         WARN_ON(*level < 0);
2211         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2212
2213         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214                 refs = nrefs->refs[*level];
2215                 ret = 0;
2216         } else {
2217                 ret = btrfs_lookup_extent_info(NULL, root,
2218                                        path->nodes[*level]->start,
2219                                        *level, 1, &refs, NULL);
2220                 if (ret < 0) {
2221                         err = ret;
2222                         goto out;
2223                 }
2224                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225                 nrefs->refs[*level] = refs;
2226         }
2227
2228         if (refs > 1) {
2229                 ret = enter_shared_node(root, path->nodes[*level]->start,
2230                                         refs, wc, *level);
2231                 if (ret > 0) {
2232                         err = ret;
2233                         goto out;
2234                 }
2235         }
2236
2237         while (*level >= 0) {
2238                 WARN_ON(*level < 0);
2239                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240                 cur = path->nodes[*level];
2241
2242                 if (btrfs_header_level(cur) != *level)
2243                         WARN_ON(1);
2244
2245                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246                         break;
2247                 if (*level == 0) {
2248                         ret = process_one_leaf(root, cur, wc);
2249                         if (ret < 0)
2250                                 err = ret;
2251                         break;
2252                 }
2253                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2255
2256                 if (bytenr == nrefs->bytenr[*level - 1]) {
2257                         refs = nrefs->refs[*level - 1];
2258                 } else {
2259                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260                                         *level - 1, 1, &refs, NULL);
2261                         if (ret < 0) {
2262                                 refs = 0;
2263                         } else {
2264                                 nrefs->bytenr[*level - 1] = bytenr;
2265                                 nrefs->refs[*level - 1] = refs;
2266                         }
2267                 }
2268
2269                 if (refs > 1) {
2270                         ret = enter_shared_node(root, bytenr, refs,
2271                                                 wc, *level - 1);
2272                         if (ret > 0) {
2273                                 path->slots[*level]++;
2274                                 continue;
2275                         }
2276                 }
2277
2278                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280                         free_extent_buffer(next);
2281                         reada_walk_down(root, cur, path->slots[*level]);
2282                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283                         if (!extent_buffer_uptodate(next)) {
2284                                 struct btrfs_key node_key;
2285
2286                                 btrfs_node_key_to_cpu(path->nodes[*level],
2287                                                       &node_key,
2288                                                       path->slots[*level]);
2289                                 btrfs_add_corrupt_extent_record(root->fs_info,
2290                                                 &node_key,
2291                                                 path->nodes[*level]->start,
2292                                                 root->fs_info->nodesize,
2293                                                 *level);
2294                                 err = -EIO;
2295                                 goto out;
2296                         }
2297                 }
2298
2299                 ret = check_child_node(cur, path->slots[*level], next);
2300                 if (ret) {
2301                         free_extent_buffer(next);
2302                         err = ret;
2303                         goto out;
2304                 }
2305
2306                 if (btrfs_is_leaf(next))
2307                         status = btrfs_check_leaf(root, NULL, next);
2308                 else
2309                         status = btrfs_check_node(root, NULL, next);
2310                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311                         free_extent_buffer(next);
2312                         err = -EIO;
2313                         goto out;
2314                 }
2315
2316                 *level = *level - 1;
2317                 free_extent_buffer(path->nodes[*level]);
2318                 path->nodes[*level] = next;
2319                 path->slots[*level] = 0;
2320         }
2321 out:
2322         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2323         return err;
2324 }
2325
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327                             unsigned int ext_ref);
2328
2329 /*
2330  * Returns >0  Found error, should continue
2331  * Returns <0  Fatal error, must exit the whole check
2332  * Returns 0   No errors found
2333  */
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335                              int *level, struct node_refs *nrefs, int ext_ref)
2336 {
2337         enum btrfs_tree_block_status status;
2338         u64 bytenr;
2339         u64 ptr_gen;
2340         struct btrfs_fs_info *fs_info = root->fs_info;
2341         struct extent_buffer *next;
2342         struct extent_buffer *cur;
2343         int ret;
2344
2345         WARN_ON(*level < 0);
2346         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2347
2348         ret = update_nodes_refs(root, path->nodes[*level]->start,
2349                                 nrefs, *level);
2350         if (ret < 0)
2351                 return ret;
2352
2353         while (*level >= 0) {
2354                 WARN_ON(*level < 0);
2355                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356                 cur = path->nodes[*level];
2357
2358                 if (btrfs_header_level(cur) != *level)
2359                         WARN_ON(1);
2360
2361                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2362                         break;
2363                 /* Don't forgot to check leaf/node validation */
2364                 if (*level == 0) {
2365                         ret = btrfs_check_leaf(root, NULL, cur);
2366                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2367                                 ret = -EIO;
2368                                 break;
2369                         }
2370                         ret = process_one_leaf_v2(root, path, nrefs,
2371                                                   level, ext_ref);
2372                         cur = path->nodes[*level];
2373                         break;
2374                 } else {
2375                         ret = btrfs_check_node(root, NULL, cur);
2376                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2377                                 ret = -EIO;
2378                                 break;
2379                         }
2380                 }
2381                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2383
2384                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385                 if (ret)
2386                         break;
2387                 if (!nrefs->need_check[*level - 1]) {
2388                         path->slots[*level]++;
2389                         continue;
2390                 }
2391
2392                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394                         free_extent_buffer(next);
2395                         reada_walk_down(root, cur, path->slots[*level]);
2396                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2397                         if (!extent_buffer_uptodate(next)) {
2398                                 struct btrfs_key node_key;
2399
2400                                 btrfs_node_key_to_cpu(path->nodes[*level],
2401                                                       &node_key,
2402                                                       path->slots[*level]);
2403                                 btrfs_add_corrupt_extent_record(fs_info,
2404                                                 &node_key,
2405                                                 path->nodes[*level]->start,
2406                                                 fs_info->nodesize,
2407                                                 *level);
2408                                 ret = -EIO;
2409                                 break;
2410                         }
2411                 }
2412
2413                 ret = check_child_node(cur, path->slots[*level], next);
2414                 if (ret < 0) 
2415                         break;
2416
2417                 if (btrfs_is_leaf(next))
2418                         status = btrfs_check_leaf(root, NULL, next);
2419                 else
2420                         status = btrfs_check_node(root, NULL, next);
2421                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422                         free_extent_buffer(next);
2423                         ret = -EIO;
2424                         break;
2425                 }
2426
2427                 *level = *level - 1;
2428                 free_extent_buffer(path->nodes[*level]);
2429                 path->nodes[*level] = next;
2430                 path->slots[*level] = 0;
2431         }
2432         return ret;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int __create_inode_item(struct btrfs_trans_handle *trans,
2695                                struct btrfs_root *root, u64 ino, u64 size,
2696                                u64 nbytes, u64 nlink, u32 mode)
2697 {
2698         struct btrfs_inode_item ii;
2699         time_t now = time(NULL);
2700         int ret;
2701
2702         btrfs_set_stack_inode_size(&ii, size);
2703         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2704         btrfs_set_stack_inode_nlink(&ii, nlink);
2705         btrfs_set_stack_inode_mode(&ii, mode);
2706         btrfs_set_stack_inode_generation(&ii, trans->transid);
2707         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2708         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2709         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2710         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2711         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2712         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2713         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2714
2715         ret = btrfs_insert_inode(trans, root, ino, &ii);
2716         ASSERT(!ret);
2717
2718         warning("root %llu inode %llu recreating inode item, this may "
2719                 "be incomplete, please check permissions and content after "
2720                 "the fsck completes.\n", (unsigned long long)root->objectid,
2721                 (unsigned long long)ino);
2722
2723         return 0;
2724 }
2725
2726 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2727                                     struct btrfs_root *root, u64 ino,
2728                                     u8 filetype)
2729 {
2730         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2731
2732         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2733 }
2734
2735 static int create_inode_item(struct btrfs_root *root,
2736                              struct inode_record *rec, int root_dir)
2737 {
2738         struct btrfs_trans_handle *trans;
2739         u64 nlink = 0;
2740         u32 mode = 0;
2741         u64 size = 0;
2742         int ret;
2743
2744         trans = btrfs_start_transaction(root, 1);
2745         if (IS_ERR(trans)) {
2746                 ret = PTR_ERR(trans);
2747                 return ret;
2748         }
2749
2750         nlink = root_dir ? 1 : rec->found_link;
2751         if (rec->found_dir_item) {
2752                 if (rec->found_file_extent)
2753                         fprintf(stderr, "root %llu inode %llu has both a dir "
2754                                 "item and extents, unsure if it is a dir or a "
2755                                 "regular file so setting it as a directory\n",
2756                                 (unsigned long long)root->objectid,
2757                                 (unsigned long long)rec->ino);
2758                 mode = S_IFDIR | 0755;
2759                 size = rec->found_size;
2760         } else if (!rec->found_dir_item) {
2761                 size = rec->extent_end;
2762                 mode =  S_IFREG | 0755;
2763         }
2764
2765         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2766                                   nlink, mode);
2767         btrfs_commit_transaction(trans, root);
2768         return 0;
2769 }
2770
2771 static int repair_inode_backrefs(struct btrfs_root *root,
2772                                  struct inode_record *rec,
2773                                  struct cache_tree *inode_cache,
2774                                  int delete)
2775 {
2776         struct inode_backref *tmp, *backref;
2777         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2778         int ret = 0;
2779         int repaired = 0;
2780
2781         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2782                 if (!delete && rec->ino == root_dirid) {
2783                         if (!rec->found_inode_item) {
2784                                 ret = create_inode_item(root, rec, 1);
2785                                 if (ret)
2786                                         break;
2787                                 repaired++;
2788                         }
2789                 }
2790
2791                 /* Index 0 for root dir's are special, don't mess with it */
2792                 if (rec->ino == root_dirid && backref->index == 0)
2793                         continue;
2794
2795                 if (delete &&
2796                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2797                      (backref->found_dir_index && backref->found_inode_ref &&
2798                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2799                         ret = delete_dir_index(root, backref);
2800                         if (ret)
2801                                 break;
2802                         repaired++;
2803                         list_del(&backref->list);
2804                         free(backref);
2805                         continue;
2806                 }
2807
2808                 if (!delete && !backref->found_dir_index &&
2809                     backref->found_dir_item && backref->found_inode_ref) {
2810                         ret = add_missing_dir_index(root, inode_cache, rec,
2811                                                     backref);
2812                         if (ret)
2813                                 break;
2814                         repaired++;
2815                         if (backref->found_dir_item &&
2816                             backref->found_dir_index) {
2817                                 if (!backref->errors &&
2818                                     backref->found_inode_ref) {
2819                                         list_del(&backref->list);
2820                                         free(backref);
2821                                         continue;
2822                                 }
2823                         }
2824                 }
2825
2826                 if (!delete && (!backref->found_dir_index &&
2827                                 !backref->found_dir_item &&
2828                                 backref->found_inode_ref)) {
2829                         struct btrfs_trans_handle *trans;
2830                         struct btrfs_key location;
2831
2832                         ret = check_dir_conflict(root, backref->name,
2833                                                  backref->namelen,
2834                                                  backref->dir,
2835                                                  backref->index);
2836                         if (ret) {
2837                                 /*
2838                                  * let nlink fixing routine to handle it,
2839                                  * which can do it better.
2840                                  */
2841                                 ret = 0;
2842                                 break;
2843                         }
2844                         location.objectid = rec->ino;
2845                         location.type = BTRFS_INODE_ITEM_KEY;
2846                         location.offset = 0;
2847
2848                         trans = btrfs_start_transaction(root, 1);
2849                         if (IS_ERR(trans)) {
2850                                 ret = PTR_ERR(trans);
2851                                 break;
2852                         }
2853                         fprintf(stderr, "adding missing dir index/item pair "
2854                                 "for inode %llu\n",
2855                                 (unsigned long long)rec->ino);
2856                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2857                                                     backref->namelen,
2858                                                     backref->dir, &location,
2859                                                     imode_to_type(rec->imode),
2860                                                     backref->index);
2861                         BUG_ON(ret);
2862                         btrfs_commit_transaction(trans, root);
2863                         repaired++;
2864                 }
2865
2866                 if (!delete && (backref->found_inode_ref &&
2867                                 backref->found_dir_index &&
2868                                 backref->found_dir_item &&
2869                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2870                                 !rec->found_inode_item)) {
2871                         ret = create_inode_item(root, rec, 0);
2872                         if (ret)
2873                                 break;
2874                         repaired++;
2875                 }
2876
2877         }
2878         return ret ? ret : repaired;
2879 }
2880
2881 /*
2882  * To determine the file type for nlink/inode_item repair
2883  *
2884  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2885  * Return -ENOENT if file type is not found.
2886  */
2887 static int find_file_type(struct inode_record *rec, u8 *type)
2888 {
2889         struct inode_backref *backref;
2890
2891         /* For inode item recovered case */
2892         if (rec->found_inode_item) {
2893                 *type = imode_to_type(rec->imode);
2894                 return 0;
2895         }
2896
2897         list_for_each_entry(backref, &rec->backrefs, list) {
2898                 if (backref->found_dir_index || backref->found_dir_item) {
2899                         *type = backref->filetype;
2900                         return 0;
2901                 }
2902         }
2903         return -ENOENT;
2904 }
2905
2906 /*
2907  * To determine the file name for nlink repair
2908  *
2909  * Return 0 if file name is found, set name and namelen.
2910  * Return -ENOENT if file name is not found.
2911  */
2912 static int find_file_name(struct inode_record *rec,
2913                           char *name, int *namelen)
2914 {
2915         struct inode_backref *backref;
2916
2917         list_for_each_entry(backref, &rec->backrefs, list) {
2918                 if (backref->found_dir_index || backref->found_dir_item ||
2919                     backref->found_inode_ref) {
2920                         memcpy(name, backref->name, backref->namelen);
2921                         *namelen = backref->namelen;
2922                         return 0;
2923                 }
2924         }
2925         return -ENOENT;
2926 }
2927
2928 /* Reset the nlink of the inode to the correct one */
2929 static int reset_nlink(struct btrfs_trans_handle *trans,
2930                        struct btrfs_root *root,
2931                        struct btrfs_path *path,
2932                        struct inode_record *rec)
2933 {
2934         struct inode_backref *backref;
2935         struct inode_backref *tmp;
2936         struct btrfs_key key;
2937         struct btrfs_inode_item *inode_item;
2938         int ret = 0;
2939
2940         /* We don't believe this either, reset it and iterate backref */
2941         rec->found_link = 0;
2942
2943         /* Remove all backref including the valid ones */
2944         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2945                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2946                                    backref->index, backref->name,
2947                                    backref->namelen, 0);
2948                 if (ret < 0)
2949                         goto out;
2950
2951                 /* remove invalid backref, so it won't be added back */
2952                 if (!(backref->found_dir_index &&
2953                       backref->found_dir_item &&
2954                       backref->found_inode_ref)) {
2955                         list_del(&backref->list);
2956                         free(backref);
2957                 } else {
2958                         rec->found_link++;
2959                 }
2960         }
2961
2962         /* Set nlink to 0 */
2963         key.objectid = rec->ino;
2964         key.type = BTRFS_INODE_ITEM_KEY;
2965         key.offset = 0;
2966         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2967         if (ret < 0)
2968                 goto out;
2969         if (ret > 0) {
2970                 ret = -ENOENT;
2971                 goto out;
2972         }
2973         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2974                                     struct btrfs_inode_item);
2975         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2976         btrfs_mark_buffer_dirty(path->nodes[0]);
2977         btrfs_release_path(path);
2978
2979         /*
2980          * Add back valid inode_ref/dir_item/dir_index,
2981          * add_link() will handle the nlink inc, so new nlink must be correct
2982          */
2983         list_for_each_entry(backref, &rec->backrefs, list) {
2984                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2985                                      backref->name, backref->namelen,
2986                                      backref->filetype, &backref->index, 1, 0);
2987                 if (ret < 0)
2988                         goto out;
2989         }
2990 out:
2991         btrfs_release_path(path);
2992         return ret;
2993 }
2994
2995 static int get_highest_inode(struct btrfs_trans_handle *trans,
2996                                 struct btrfs_root *root,
2997                                 struct btrfs_path *path,
2998                                 u64 *highest_ino)
2999 {
3000         struct btrfs_key key, found_key;
3001         int ret;
3002
3003         btrfs_init_path(path);
3004         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3005         key.offset = -1;
3006         key.type = BTRFS_INODE_ITEM_KEY;
3007         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3008         if (ret == 1) {
3009                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3010                                 path->slots[0] - 1);
3011                 *highest_ino = found_key.objectid;
3012                 ret = 0;
3013         }
3014         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3015                 ret = -EOVERFLOW;
3016         btrfs_release_path(path);
3017         return ret;
3018 }
3019
3020 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3021                                struct btrfs_root *root,
3022                                struct btrfs_path *path,
3023                                struct inode_record *rec)
3024 {
3025         char *dir_name = "lost+found";
3026         char namebuf[BTRFS_NAME_LEN] = {0};
3027         u64 lost_found_ino;
3028         u32 mode = 0700;
3029         u8 type = 0;
3030         int namelen = 0;
3031         int name_recovered = 0;
3032         int type_recovered = 0;
3033         int ret = 0;
3034
3035         /*
3036          * Get file name and type first before these invalid inode ref
3037          * are deleted by remove_all_invalid_backref()
3038          */
3039         name_recovered = !find_file_name(rec, namebuf, &namelen);
3040         type_recovered = !find_file_type(rec, &type);
3041
3042         if (!name_recovered) {
3043                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3044                        rec->ino, rec->ino);
3045                 namelen = count_digits(rec->ino);
3046                 sprintf(namebuf, "%llu", rec->ino);
3047                 name_recovered = 1;
3048         }
3049         if (!type_recovered) {
3050                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3051                        rec->ino);
3052                 type = BTRFS_FT_REG_FILE;
3053                 type_recovered = 1;
3054         }
3055
3056         ret = reset_nlink(trans, root, path, rec);
3057         if (ret < 0) {
3058                 fprintf(stderr,
3059                         "Failed to reset nlink for inode %llu: %s\n",
3060                         rec->ino, strerror(-ret));
3061                 goto out;
3062         }
3063
3064         if (rec->found_link == 0) {
3065                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3066                 if (ret < 0)
3067                         goto out;
3068                 lost_found_ino++;
3069                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3070                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3071                                   mode);
3072                 if (ret < 0) {
3073                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3074                                 dir_name, strerror(-ret));
3075                         goto out;
3076                 }
3077                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3078                                      namebuf, namelen, type, NULL, 1, 0);
3079                 /*
3080                  * Add ".INO" suffix several times to handle case where
3081                  * "FILENAME.INO" is already taken by another file.
3082                  */
3083                 while (ret == -EEXIST) {
3084                         /*
3085                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3086                          */
3087                         if (namelen + count_digits(rec->ino) + 1 >
3088                             BTRFS_NAME_LEN) {
3089                                 ret = -EFBIG;
3090                                 goto out;
3091                         }
3092                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3093                                  ".%llu", rec->ino);
3094                         namelen += count_digits(rec->ino) + 1;
3095                         ret = btrfs_add_link(trans, root, rec->ino,
3096                                              lost_found_ino, namebuf,
3097                                              namelen, type, NULL, 1, 0);
3098                 }
3099                 if (ret < 0) {
3100                         fprintf(stderr,
3101                                 "Failed to link the inode %llu to %s dir: %s\n",
3102                                 rec->ino, dir_name, strerror(-ret));
3103                         goto out;
3104                 }
3105                 /*
3106                  * Just increase the found_link, don't actually add the
3107                  * backref. This will make things easier and this inode
3108                  * record will be freed after the repair is done.
3109                  * So fsck will not report problem about this inode.
3110                  */
3111                 rec->found_link++;
3112                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3113                        namelen, namebuf, dir_name);
3114         }
3115         printf("Fixed the nlink of inode %llu\n", rec->ino);
3116 out:
3117         /*
3118          * Clear the flag anyway, or we will loop forever for the same inode
3119          * as it will not be removed from the bad inode list and the dead loop
3120          * happens.
3121          */
3122         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3123         btrfs_release_path(path);
3124         return ret;
3125 }
3126
3127 /*
3128  * Check if there is any normal(reg or prealloc) file extent for given
3129  * ino.
3130  * This is used to determine the file type when neither its dir_index/item or
3131  * inode_item exists.
3132  *
3133  * This will *NOT* report error, if any error happens, just consider it does
3134  * not have any normal file extent.
3135  */
3136 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3137 {
3138         struct btrfs_path path;
3139         struct btrfs_key key;
3140         struct btrfs_key found_key;
3141         struct btrfs_file_extent_item *fi;
3142         u8 type;
3143         int ret = 0;
3144
3145         btrfs_init_path(&path);
3146         key.objectid = ino;
3147         key.type = BTRFS_EXTENT_DATA_KEY;
3148         key.offset = 0;
3149
3150         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3151         if (ret < 0) {
3152                 ret = 0;
3153                 goto out;
3154         }
3155         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3156                 ret = btrfs_next_leaf(root, &path);
3157                 if (ret) {
3158                         ret = 0;
3159                         goto out;
3160                 }
3161         }
3162         while (1) {
3163                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3164                                       path.slots[0]);
3165                 if (found_key.objectid != ino ||
3166                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3167                         break;
3168                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3169                                     struct btrfs_file_extent_item);
3170                 type = btrfs_file_extent_type(path.nodes[0], fi);
3171                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3172                         ret = 1;
3173                         goto out;
3174                 }
3175         }
3176 out:
3177         btrfs_release_path(&path);
3178         return ret;
3179 }
3180
3181 static u32 btrfs_type_to_imode(u8 type)
3182 {
3183         static u32 imode_by_btrfs_type[] = {
3184                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3185                 [BTRFS_FT_DIR]          = S_IFDIR,
3186                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3187                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3188                 [BTRFS_FT_FIFO]         = S_IFIFO,
3189                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3190                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3191         };
3192
3193         return imode_by_btrfs_type[(type)];
3194 }
3195
3196 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3197                                 struct btrfs_root *root,
3198                                 struct btrfs_path *path,
3199                                 struct inode_record *rec)
3200 {
3201         u8 filetype;
3202         u32 mode = 0700;
3203         int type_recovered = 0;
3204         int ret = 0;
3205
3206         printf("Trying to rebuild inode:%llu\n", rec->ino);
3207
3208         type_recovered = !find_file_type(rec, &filetype);
3209
3210         /*
3211          * Try to determine inode type if type not found.
3212          *
3213          * For found regular file extent, it must be FILE.
3214          * For found dir_item/index, it must be DIR.
3215          *
3216          * For undetermined one, use FILE as fallback.
3217          *
3218          * TODO:
3219          * 1. If found backref(inode_index/item is already handled) to it,
3220          *    it must be DIR.
3221          *    Need new inode-inode ref structure to allow search for that.
3222          */
3223         if (!type_recovered) {
3224                 if (rec->found_file_extent &&
3225                     find_normal_file_extent(root, rec->ino)) {
3226                         type_recovered = 1;
3227                         filetype = BTRFS_FT_REG_FILE;
3228                 } else if (rec->found_dir_item) {
3229                         type_recovered = 1;
3230                         filetype = BTRFS_FT_DIR;
3231                 } else if (!list_empty(&rec->orphan_extents)) {
3232                         type_recovered = 1;
3233                         filetype = BTRFS_FT_REG_FILE;
3234                 } else{
3235                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3236                                rec->ino);
3237                         type_recovered = 1;
3238                         filetype = BTRFS_FT_REG_FILE;
3239                 }
3240         }
3241
3242         ret = btrfs_new_inode(trans, root, rec->ino,
3243                               mode | btrfs_type_to_imode(filetype));
3244         if (ret < 0)
3245                 goto out;
3246
3247         /*
3248          * Here inode rebuild is done, we only rebuild the inode item,
3249          * don't repair the nlink(like move to lost+found).
3250          * That is the job of nlink repair.
3251          *
3252          * We just fill the record and return
3253          */
3254         rec->found_dir_item = 1;
3255         rec->imode = mode | btrfs_type_to_imode(filetype);
3256         rec->nlink = 0;
3257         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3258         /* Ensure the inode_nlinks repair function will be called */
3259         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3260 out:
3261         return ret;
3262 }
3263
3264 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3265                                       struct btrfs_root *root,
3266                                       struct btrfs_path *path,
3267                                       struct inode_record *rec)
3268 {
3269         struct orphan_data_extent *orphan;
3270         struct orphan_data_extent *tmp;
3271         int ret = 0;
3272
3273         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3274                 /*
3275                  * Check for conflicting file extents
3276                  *
3277                  * Here we don't know whether the extents is compressed or not,
3278                  * so we can only assume it not compressed nor data offset,
3279                  * and use its disk_len as extent length.
3280                  */
3281                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3282                                        orphan->offset, orphan->disk_len, 0);
3283                 btrfs_release_path(path);
3284                 if (ret < 0)
3285                         goto out;
3286                 if (!ret) {
3287                         fprintf(stderr,
3288                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3289                                 orphan->disk_bytenr, orphan->disk_len);
3290                         ret = btrfs_free_extent(trans,
3291                                         root->fs_info->extent_root,
3292                                         orphan->disk_bytenr, orphan->disk_len,
3293                                         0, root->objectid, orphan->objectid,
3294                                         orphan->offset);
3295                         if (ret < 0)
3296                                 goto out;
3297                 }
3298                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3299                                 orphan->offset, orphan->disk_bytenr,
3300                                 orphan->disk_len, orphan->disk_len);
3301                 if (ret < 0)
3302                         goto out;
3303
3304                 /* Update file size info */
3305                 rec->found_size += orphan->disk_len;
3306                 if (rec->found_size == rec->nbytes)
3307                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3308
3309                 /* Update the file extent hole info too */
3310                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3311                                            orphan->disk_len);
3312                 if (ret < 0)
3313                         goto out;
3314                 if (RB_EMPTY_ROOT(&rec->holes))
3315                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3316
3317                 list_del(&orphan->list);
3318                 free(orphan);
3319         }
3320         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3321 out:
3322         return ret;
3323 }
3324
3325 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3326                                         struct btrfs_root *root,
3327                                         struct btrfs_path *path,
3328                                         struct inode_record *rec)
3329 {
3330         struct rb_node *node;
3331         struct file_extent_hole *hole;
3332         int found = 0;
3333         int ret = 0;
3334
3335         node = rb_first(&rec->holes);
3336
3337         while (node) {
3338                 found = 1;
3339                 hole = rb_entry(node, struct file_extent_hole, node);
3340                 ret = btrfs_punch_hole(trans, root, rec->ino,
3341                                        hole->start, hole->len);
3342                 if (ret < 0)
3343                         goto out;
3344                 ret = del_file_extent_hole(&rec->holes, hole->start,
3345                                            hole->len);
3346                 if (ret < 0)
3347                         goto out;
3348                 if (RB_EMPTY_ROOT(&rec->holes))
3349                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3350                 node = rb_first(&rec->holes);
3351         }
3352         /* special case for a file losing all its file extent */
3353         if (!found) {
3354                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3355                                        round_up(rec->isize,
3356                                                 root->fs_info->sectorsize));
3357                 if (ret < 0)
3358                         goto out;
3359         }
3360         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3361                rec->ino, root->objectid);
3362 out:
3363         return ret;
3364 }
3365
3366 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3367 {
3368         struct btrfs_trans_handle *trans;
3369         struct btrfs_path path;
3370         int ret = 0;
3371
3372         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3373                              I_ERR_NO_ORPHAN_ITEM |
3374                              I_ERR_LINK_COUNT_WRONG |
3375                              I_ERR_NO_INODE_ITEM |
3376                              I_ERR_FILE_EXTENT_ORPHAN |
3377                              I_ERR_FILE_EXTENT_DISCOUNT|
3378                              I_ERR_FILE_NBYTES_WRONG)))
3379                 return rec->errors;
3380
3381         /*
3382          * For nlink repair, it may create a dir and add link, so
3383          * 2 for parent(256)'s dir_index and dir_item
3384          * 2 for lost+found dir's inode_item and inode_ref
3385          * 1 for the new inode_ref of the file
3386          * 2 for lost+found dir's dir_index and dir_item for the file
3387          */
3388         trans = btrfs_start_transaction(root, 7);
3389         if (IS_ERR(trans))
3390                 return PTR_ERR(trans);
3391
3392         btrfs_init_path(&path);
3393         if (rec->errors & I_ERR_NO_INODE_ITEM)
3394                 ret = repair_inode_no_item(trans, root, &path, rec);
3395         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3396                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3397         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3398                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3399         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3400                 ret = repair_inode_isize(trans, root, &path, rec);
3401         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3402                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3403         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3404                 ret = repair_inode_nlinks(trans, root, &path, rec);
3405         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3406                 ret = repair_inode_nbytes(trans, root, &path, rec);
3407         btrfs_commit_transaction(trans, root);
3408         btrfs_release_path(&path);
3409         return ret;
3410 }
3411
3412 static int check_inode_recs(struct btrfs_root *root,
3413                             struct cache_tree *inode_cache)
3414 {
3415         struct cache_extent *cache;
3416         struct ptr_node *node;
3417         struct inode_record *rec;
3418         struct inode_backref *backref;
3419         int stage = 0;
3420         int ret = 0;
3421         int err = 0;
3422         u64 error = 0;
3423         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3424
3425         if (btrfs_root_refs(&root->root_item) == 0) {
3426                 if (!cache_tree_empty(inode_cache))
3427                         fprintf(stderr, "warning line %d\n", __LINE__);
3428                 return 0;
3429         }
3430
3431         /*
3432          * We need to repair backrefs first because we could change some of the
3433          * errors in the inode recs.
3434          *
3435          * We also need to go through and delete invalid backrefs first and then
3436          * add the correct ones second.  We do this because we may get EEXIST
3437          * when adding back the correct index because we hadn't yet deleted the
3438          * invalid index.
3439          *
3440          * For example, if we were missing a dir index then the directories
3441          * isize would be wrong, so if we fixed the isize to what we thought it
3442          * would be and then fixed the backref we'd still have a invalid fs, so
3443          * we need to add back the dir index and then check to see if the isize
3444          * is still wrong.
3445          */
3446         while (stage < 3) {
3447                 stage++;
3448                 if (stage == 3 && !err)
3449                         break;
3450
3451                 cache = search_cache_extent(inode_cache, 0);
3452                 while (repair && cache) {
3453                         node = container_of(cache, struct ptr_node, cache);
3454                         rec = node->data;
3455                         cache = next_cache_extent(cache);
3456
3457                         /* Need to free everything up and rescan */
3458                         if (stage == 3) {
3459                                 remove_cache_extent(inode_cache, &node->cache);
3460                                 free(node);
3461                                 free_inode_rec(rec);
3462                                 continue;
3463                         }
3464
3465                         if (list_empty(&rec->backrefs))
3466                                 continue;
3467
3468                         ret = repair_inode_backrefs(root, rec, inode_cache,
3469                                                     stage == 1);
3470                         if (ret < 0) {
3471                                 err = ret;
3472                                 stage = 2;
3473                                 break;
3474                         } if (ret > 0) {
3475                                 err = -EAGAIN;
3476                         }
3477                 }
3478         }
3479         if (err)
3480                 return err;
3481
3482         rec = get_inode_rec(inode_cache, root_dirid, 0);
3483         BUG_ON(IS_ERR(rec));
3484         if (rec) {
3485                 ret = check_root_dir(rec);
3486                 if (ret) {
3487                         fprintf(stderr, "root %llu root dir %llu error\n",
3488                                 (unsigned long long)root->root_key.objectid,
3489                                 (unsigned long long)root_dirid);
3490                         print_inode_error(root, rec);
3491                         error++;
3492                 }
3493         } else {
3494                 if (repair) {
3495                         struct btrfs_trans_handle *trans;
3496
3497                         trans = btrfs_start_transaction(root, 1);
3498                         if (IS_ERR(trans)) {
3499                                 err = PTR_ERR(trans);
3500                                 return err;
3501                         }
3502
3503                         fprintf(stderr,
3504                                 "root %llu missing its root dir, recreating\n",
3505                                 (unsigned long long)root->objectid);
3506
3507                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3508                         BUG_ON(ret);
3509
3510                         btrfs_commit_transaction(trans, root);
3511                         return -EAGAIN;
3512                 }
3513
3514                 fprintf(stderr, "root %llu root dir %llu not found\n",
3515                         (unsigned long long)root->root_key.objectid,
3516                         (unsigned long long)root_dirid);
3517         }
3518
3519         while (1) {
3520                 cache = search_cache_extent(inode_cache, 0);
3521                 if (!cache)
3522                         break;
3523                 node = container_of(cache, struct ptr_node, cache);
3524                 rec = node->data;
3525                 remove_cache_extent(inode_cache, &node->cache);
3526                 free(node);
3527                 if (rec->ino == root_dirid ||
3528                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3529                         free_inode_rec(rec);
3530                         continue;
3531                 }
3532
3533                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3534                         ret = check_orphan_item(root, rec->ino);
3535                         if (ret == 0)
3536                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3537                         if (can_free_inode_rec(rec)) {
3538                                 free_inode_rec(rec);
3539                                 continue;
3540                         }
3541                 }
3542
3543                 if (!rec->found_inode_item)
3544                         rec->errors |= I_ERR_NO_INODE_ITEM;
3545                 if (rec->found_link != rec->nlink)
3546                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3547                 if (repair) {
3548                         ret = try_repair_inode(root, rec);
3549                         if (ret == 0 && can_free_inode_rec(rec)) {
3550                                 free_inode_rec(rec);
3551                                 continue;
3552                         }
3553                         ret = 0;
3554                 }
3555
3556                 if (!(repair && ret == 0))
3557                         error++;
3558                 print_inode_error(root, rec);
3559                 list_for_each_entry(backref, &rec->backrefs, list) {
3560                         if (!backref->found_dir_item)
3561                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3562                         if (!backref->found_dir_index)
3563                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3564                         if (!backref->found_inode_ref)
3565                                 backref->errors |= REF_ERR_NO_INODE_REF;
3566                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3567                                 " namelen %u name %s filetype %d errors %x",
3568                                 (unsigned long long)backref->dir,
3569                                 (unsigned long long)backref->index,
3570                                 backref->namelen, backref->name,
3571                                 backref->filetype, backref->errors);
3572                         print_ref_error(backref->errors);
3573                 }
3574                 free_inode_rec(rec);
3575         }
3576         return (error > 0) ? -1 : 0;
3577 }
3578
3579 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3580                                         u64 objectid)
3581 {
3582         struct cache_extent *cache;
3583         struct root_record *rec = NULL;
3584         int ret;
3585
3586         cache = lookup_cache_extent(root_cache, objectid, 1);
3587         if (cache) {
3588                 rec = container_of(cache, struct root_record, cache);
3589         } else {
3590                 rec = calloc(1, sizeof(*rec));
3591                 if (!rec)
3592                         return ERR_PTR(-ENOMEM);
3593                 rec->objectid = objectid;
3594                 INIT_LIST_HEAD(&rec->backrefs);
3595                 rec->cache.start = objectid;
3596                 rec->cache.size = 1;
3597
3598                 ret = insert_cache_extent(root_cache, &rec->cache);
3599                 if (ret)
3600                         return ERR_PTR(-EEXIST);
3601         }
3602         return rec;
3603 }
3604
3605 static struct root_backref *get_root_backref(struct root_record *rec,
3606                                              u64 ref_root, u64 dir, u64 index,
3607                                              const char *name, int namelen)
3608 {
3609         struct root_backref *backref;
3610
3611         list_for_each_entry(backref, &rec->backrefs, list) {
3612                 if (backref->ref_root != ref_root || backref->dir != dir ||
3613                     backref->namelen != namelen)
3614                         continue;
3615                 if (memcmp(name, backref->name, namelen))
3616                         continue;
3617                 return backref;
3618         }
3619
3620         backref = calloc(1, sizeof(*backref) + namelen + 1);
3621         if (!backref)
3622                 return NULL;
3623         backref->ref_root = ref_root;
3624         backref->dir = dir;
3625         backref->index = index;
3626         backref->namelen = namelen;
3627         memcpy(backref->name, name, namelen);
3628         backref->name[namelen] = '\0';
3629         list_add_tail(&backref->list, &rec->backrefs);
3630         return backref;
3631 }
3632
3633 static void free_root_record(struct cache_extent *cache)
3634 {
3635         struct root_record *rec;
3636         struct root_backref *backref;
3637
3638         rec = container_of(cache, struct root_record, cache);
3639         while (!list_empty(&rec->backrefs)) {
3640                 backref = to_root_backref(rec->backrefs.next);
3641                 list_del(&backref->list);
3642                 free(backref);
3643         }
3644
3645         free(rec);
3646 }
3647
3648 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3649
3650 static int add_root_backref(struct cache_tree *root_cache,
3651                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3652                             const char *name, int namelen,
3653                             int item_type, int errors)
3654 {
3655         struct root_record *rec;
3656         struct root_backref *backref;
3657
3658         rec = get_root_rec(root_cache, root_id);
3659         BUG_ON(IS_ERR(rec));
3660         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3661         BUG_ON(!backref);
3662
3663         backref->errors |= errors;
3664
3665         if (item_type != BTRFS_DIR_ITEM_KEY) {
3666                 if (backref->found_dir_index || backref->found_back_ref ||
3667                     backref->found_forward_ref) {
3668                         if (backref->index != index)
3669                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3670                 } else {
3671                         backref->index = index;
3672                 }
3673         }
3674
3675         if (item_type == BTRFS_DIR_ITEM_KEY) {
3676                 if (backref->found_forward_ref)
3677                         rec->found_ref++;
3678                 backref->found_dir_item = 1;
3679         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3680                 backref->found_dir_index = 1;
3681         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3682                 if (backref->found_forward_ref)
3683                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3684                 else if (backref->found_dir_item)
3685                         rec->found_ref++;
3686                 backref->found_forward_ref = 1;
3687         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3688                 if (backref->found_back_ref)
3689                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3690                 backref->found_back_ref = 1;
3691         } else {
3692                 BUG_ON(1);
3693         }
3694
3695         if (backref->found_forward_ref && backref->found_dir_item)
3696                 backref->reachable = 1;
3697         return 0;
3698 }
3699
3700 static int merge_root_recs(struct btrfs_root *root,
3701                            struct cache_tree *src_cache,
3702                            struct cache_tree *dst_cache)
3703 {
3704         struct cache_extent *cache;
3705         struct ptr_node *node;
3706         struct inode_record *rec;
3707         struct inode_backref *backref;
3708         int ret = 0;
3709
3710         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3711                 free_inode_recs_tree(src_cache);
3712                 return 0;
3713         }
3714
3715         while (1) {
3716                 cache = search_cache_extent(src_cache, 0);
3717                 if (!cache)
3718                         break;
3719                 node = container_of(cache, struct ptr_node, cache);
3720                 rec = node->data;
3721                 remove_cache_extent(src_cache, &node->cache);
3722                 free(node);
3723
3724                 ret = is_child_root(root, root->objectid, rec->ino);
3725                 if (ret < 0)
3726                         break;
3727                 else if (ret == 0)
3728                         goto skip;
3729
3730                 list_for_each_entry(backref, &rec->backrefs, list) {
3731                         BUG_ON(backref->found_inode_ref);
3732                         if (backref->found_dir_item)
3733                                 add_root_backref(dst_cache, rec->ino,
3734                                         root->root_key.objectid, backref->dir,
3735                                         backref->index, backref->name,
3736                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3737                                         backref->errors);
3738                         if (backref->found_dir_index)
3739                                 add_root_backref(dst_cache, rec->ino,
3740                                         root->root_key.objectid, backref->dir,
3741                                         backref->index, backref->name,
3742                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3743                                         backref->errors);
3744                 }
3745 skip:
3746                 free_inode_rec(rec);
3747         }
3748         if (ret < 0)
3749                 return ret;
3750         return 0;
3751 }
3752
3753 static int check_root_refs(struct btrfs_root *root,
3754                            struct cache_tree *root_cache)
3755 {
3756         struct root_record *rec;
3757         struct root_record *ref_root;
3758         struct root_backref *backref;
3759         struct cache_extent *cache;
3760         int loop = 1;
3761         int ret;
3762         int error;
3763         int errors = 0;
3764
3765         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3766         BUG_ON(IS_ERR(rec));
3767         rec->found_ref = 1;
3768
3769         /* fixme: this can not detect circular references */
3770         while (loop) {
3771                 loop = 0;
3772                 cache = search_cache_extent(root_cache, 0);
3773                 while (1) {
3774                         if (!cache)
3775                                 break;
3776                         rec = container_of(cache, struct root_record, cache);
3777                         cache = next_cache_extent(cache);
3778
3779                         if (rec->found_ref == 0)
3780                                 continue;
3781
3782                         list_for_each_entry(backref, &rec->backrefs, list) {
3783                                 if (!backref->reachable)
3784                                         continue;
3785
3786                                 ref_root = get_root_rec(root_cache,
3787                                                         backref->ref_root);
3788                                 BUG_ON(IS_ERR(ref_root));
3789                                 if (ref_root->found_ref > 0)
3790                                         continue;
3791
3792                                 backref->reachable = 0;
3793                                 rec->found_ref--;
3794                                 if (rec->found_ref == 0)
3795                                         loop = 1;
3796                         }
3797                 }
3798         }
3799
3800         cache = search_cache_extent(root_cache, 0);
3801         while (1) {
3802                 if (!cache)
3803                         break;
3804                 rec = container_of(cache, struct root_record, cache);
3805                 cache = next_cache_extent(cache);
3806
3807                 if (rec->found_ref == 0 &&
3808                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3809                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3810                         ret = check_orphan_item(root->fs_info->tree_root,
3811                                                 rec->objectid);
3812                         if (ret == 0)
3813                                 continue;
3814
3815                         /*
3816                          * If we don't have a root item then we likely just have
3817                          * a dir item in a snapshot for this root but no actual
3818                          * ref key or anything so it's meaningless.
3819                          */
3820                         if (!rec->found_root_item)
3821                                 continue;
3822                         errors++;
3823                         fprintf(stderr, "fs tree %llu not referenced\n",
3824                                 (unsigned long long)rec->objectid);
3825                 }
3826
3827                 error = 0;
3828                 if (rec->found_ref > 0 && !rec->found_root_item)
3829                         error = 1;
3830                 list_for_each_entry(backref, &rec->backrefs, list) {
3831                         if (!backref->found_dir_item)
3832                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3833                         if (!backref->found_dir_index)
3834                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3835                         if (!backref->found_back_ref)
3836                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3837                         if (!backref->found_forward_ref)
3838                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3839                         if (backref->reachable && backref->errors)
3840                                 error = 1;
3841                 }
3842                 if (!error)
3843                         continue;
3844
3845                 errors++;
3846                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3847                         (unsigned long long)rec->objectid, rec->found_ref,
3848                          rec->found_root_item ? "" : "not found");
3849
3850                 list_for_each_entry(backref, &rec->backrefs, list) {
3851                         if (!backref->reachable)
3852                                 continue;
3853                         if (!backref->errors && rec->found_root_item)
3854                                 continue;
3855                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3856                                 " index %llu namelen %u name %s errors %x\n",
3857                                 (unsigned long long)backref->ref_root,
3858                                 (unsigned long long)backref->dir,
3859                                 (unsigned long long)backref->index,
3860                                 backref->namelen, backref->name,
3861                                 backref->errors);
3862                         print_ref_error(backref->errors);
3863                 }
3864         }
3865         return errors > 0 ? 1 : 0;
3866 }
3867
3868 static int process_root_ref(struct extent_buffer *eb, int slot,
3869                             struct btrfs_key *key,
3870                             struct cache_tree *root_cache)
3871 {
3872         u64 dirid;
3873         u64 index;
3874         u32 len;
3875         u32 name_len;
3876         struct btrfs_root_ref *ref;
3877         char namebuf[BTRFS_NAME_LEN];
3878         int error;
3879
3880         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3881
3882         dirid = btrfs_root_ref_dirid(eb, ref);
3883         index = btrfs_root_ref_sequence(eb, ref);
3884         name_len = btrfs_root_ref_name_len(eb, ref);
3885
3886         if (name_len <= BTRFS_NAME_LEN) {
3887                 len = name_len;
3888                 error = 0;
3889         } else {
3890                 len = BTRFS_NAME_LEN;
3891                 error = REF_ERR_NAME_TOO_LONG;
3892         }
3893         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3894
3895         if (key->type == BTRFS_ROOT_REF_KEY) {
3896                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3897                                  index, namebuf, len, key->type, error);
3898         } else {
3899                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3900                                  index, namebuf, len, key->type, error);
3901         }
3902         return 0;
3903 }
3904
3905 static void free_corrupt_block(struct cache_extent *cache)
3906 {
3907         struct btrfs_corrupt_block *corrupt;
3908
3909         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3910         free(corrupt);
3911 }
3912
3913 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3914
3915 /*
3916  * Repair the btree of the given root.
3917  *
3918  * The fix is to remove the node key in corrupt_blocks cache_tree.
3919  * and rebalance the tree.
3920  * After the fix, the btree should be writeable.
3921  */
3922 static int repair_btree(struct btrfs_root *root,
3923                         struct cache_tree *corrupt_blocks)
3924 {
3925         struct btrfs_trans_handle *trans;
3926         struct btrfs_path path;
3927         struct btrfs_corrupt_block *corrupt;
3928         struct cache_extent *cache;
3929         struct btrfs_key key;
3930         u64 offset;
3931         int level;
3932         int ret = 0;
3933
3934         if (cache_tree_empty(corrupt_blocks))
3935                 return 0;
3936
3937         trans = btrfs_start_transaction(root, 1);
3938         if (IS_ERR(trans)) {
3939                 ret = PTR_ERR(trans);
3940                 fprintf(stderr, "Error starting transaction: %s\n",
3941                         strerror(-ret));
3942                 return ret;
3943         }
3944         btrfs_init_path(&path);
3945         cache = first_cache_extent(corrupt_blocks);
3946         while (cache) {
3947                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3948                                        cache);
3949                 level = corrupt->level;
3950                 path.lowest_level = level;
3951                 key.objectid = corrupt->key.objectid;
3952                 key.type = corrupt->key.type;
3953                 key.offset = corrupt->key.offset;
3954
3955                 /*
3956                  * Here we don't want to do any tree balance, since it may
3957                  * cause a balance with corrupted brother leaf/node,
3958                  * so ins_len set to 0 here.
3959                  * Balance will be done after all corrupt node/leaf is deleted.
3960                  */
3961                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3962                 if (ret < 0)
3963                         goto out;
3964                 offset = btrfs_node_blockptr(path.nodes[level],
3965                                              path.slots[level]);
3966
3967                 /* Remove the ptr */
3968                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3969                 if (ret < 0)
3970                         goto out;
3971                 /*
3972                  * Remove the corresponding extent
3973                  * return value is not concerned.
3974                  */
3975                 btrfs_release_path(&path);
3976                 ret = btrfs_free_extent(trans, root, offset,
3977                                 root->fs_info->nodesize, 0,
3978                                 root->root_key.objectid, level - 1, 0);
3979                 cache = next_cache_extent(cache);
3980         }
3981
3982         /* Balance the btree using btrfs_search_slot() */
3983         cache = first_cache_extent(corrupt_blocks);
3984         while (cache) {
3985                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3986                                        cache);
3987                 memcpy(&key, &corrupt->key, sizeof(key));
3988                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3989                 if (ret < 0)
3990                         goto out;
3991                 /* return will always >0 since it won't find the item */
3992                 ret = 0;
3993                 btrfs_release_path(&path);
3994                 cache = next_cache_extent(cache);
3995         }
3996 out:
3997         btrfs_commit_transaction(trans, root);
3998         btrfs_release_path(&path);
3999         return ret;
4000 }
4001
4002 static int check_fs_root(struct btrfs_root *root,
4003                          struct cache_tree *root_cache,
4004                          struct walk_control *wc)
4005 {
4006         int ret = 0;
4007         int err = 0;
4008         int wret;
4009         int level;
4010         struct btrfs_path path;
4011         struct shared_node root_node;
4012         struct root_record *rec;
4013         struct btrfs_root_item *root_item = &root->root_item;
4014         struct cache_tree corrupt_blocks;
4015         struct orphan_data_extent *orphan;
4016         struct orphan_data_extent *tmp;
4017         enum btrfs_tree_block_status status;
4018         struct node_refs nrefs;
4019
4020         /*
4021          * Reuse the corrupt_block cache tree to record corrupted tree block
4022          *
4023          * Unlike the usage in extent tree check, here we do it in a per
4024          * fs/subvol tree base.
4025          */
4026         cache_tree_init(&corrupt_blocks);
4027         root->fs_info->corrupt_blocks = &corrupt_blocks;
4028
4029         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4030                 rec = get_root_rec(root_cache, root->root_key.objectid);
4031                 BUG_ON(IS_ERR(rec));
4032                 if (btrfs_root_refs(root_item) > 0)
4033                         rec->found_root_item = 1;
4034         }
4035
4036         btrfs_init_path(&path);
4037         memset(&root_node, 0, sizeof(root_node));
4038         cache_tree_init(&root_node.root_cache);
4039         cache_tree_init(&root_node.inode_cache);
4040         memset(&nrefs, 0, sizeof(nrefs));
4041
4042         /* Move the orphan extent record to corresponding inode_record */
4043         list_for_each_entry_safe(orphan, tmp,
4044                                  &root->orphan_data_extents, list) {
4045                 struct inode_record *inode;
4046
4047                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4048                                       1);
4049                 BUG_ON(IS_ERR(inode));
4050                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4051                 list_move(&orphan->list, &inode->orphan_extents);
4052         }
4053
4054         level = btrfs_header_level(root->node);
4055         memset(wc->nodes, 0, sizeof(wc->nodes));
4056         wc->nodes[level] = &root_node;
4057         wc->active_node = level;
4058         wc->root_level = level;
4059
4060         /* We may not have checked the root block, lets do that now */
4061         if (btrfs_is_leaf(root->node))
4062                 status = btrfs_check_leaf(root, NULL, root->node);
4063         else
4064                 status = btrfs_check_node(root, NULL, root->node);
4065         if (status != BTRFS_TREE_BLOCK_CLEAN)
4066                 return -EIO;
4067
4068         if (btrfs_root_refs(root_item) > 0 ||
4069             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4070                 path.nodes[level] = root->node;
4071                 extent_buffer_get(root->node);
4072                 path.slots[level] = 0;
4073         } else {
4074                 struct btrfs_key key;
4075                 struct btrfs_disk_key found_key;
4076
4077                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4078                 level = root_item->drop_level;
4079                 path.lowest_level = level;
4080                 if (level > btrfs_header_level(root->node) ||
4081                     level >= BTRFS_MAX_LEVEL) {
4082                         error("ignoring invalid drop level: %u", level);
4083                         goto skip_walking;
4084                 }
4085                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4086                 if (wret < 0)
4087                         goto skip_walking;
4088                 btrfs_node_key(path.nodes[level], &found_key,
4089                                 path.slots[level]);
4090                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4091                                         sizeof(found_key)));
4092         }
4093
4094         while (1) {
4095                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4096                 if (wret < 0)
4097                         ret = wret;
4098                 if (wret != 0)
4099                         break;
4100
4101                 wret = walk_up_tree(root, &path, wc, &level);
4102                 if (wret < 0)
4103                         ret = wret;
4104                 if (wret != 0)
4105                         break;
4106         }
4107 skip_walking:
4108         btrfs_release_path(&path);
4109
4110         if (!cache_tree_empty(&corrupt_blocks)) {
4111                 struct cache_extent *cache;
4112                 struct btrfs_corrupt_block *corrupt;
4113
4114                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4115                        root->root_key.objectid);
4116                 cache = first_cache_extent(&corrupt_blocks);
4117                 while (cache) {
4118                         corrupt = container_of(cache,
4119                                                struct btrfs_corrupt_block,
4120                                                cache);
4121                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4122                                cache->start, corrupt->level,
4123                                corrupt->key.objectid, corrupt->key.type,
4124                                corrupt->key.offset);
4125                         cache = next_cache_extent(cache);
4126                 }
4127                 if (repair) {
4128                         printf("Try to repair the btree for root %llu\n",
4129                                root->root_key.objectid);
4130                         ret = repair_btree(root, &corrupt_blocks);
4131                         if (ret < 0)
4132                                 fprintf(stderr, "Failed to repair btree: %s\n",
4133                                         strerror(-ret));
4134                         if (!ret)
4135                                 printf("Btree for root %llu is fixed\n",
4136                                        root->root_key.objectid);
4137                 }
4138         }
4139
4140         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4141         if (err < 0)
4142                 ret = err;
4143
4144         if (root_node.current) {
4145                 root_node.current->checked = 1;
4146                 maybe_free_inode_rec(&root_node.inode_cache,
4147                                 root_node.current);
4148         }
4149
4150         err = check_inode_recs(root, &root_node.inode_cache);
4151         if (!ret)
4152                 ret = err;
4153
4154         free_corrupt_blocks_tree(&corrupt_blocks);
4155         root->fs_info->corrupt_blocks = NULL;
4156         free_orphan_data_extents(&root->orphan_data_extents);
4157         return ret;
4158 }
4159
4160 static int fs_root_objectid(u64 objectid)
4161 {
4162         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4163             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4164                 return 1;
4165         return is_fstree(objectid);
4166 }
4167
4168 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4169                           struct cache_tree *root_cache)
4170 {
4171         struct btrfs_path path;
4172         struct btrfs_key key;
4173         struct walk_control wc;
4174         struct extent_buffer *leaf, *tree_node;
4175         struct btrfs_root *tmp_root;
4176         struct btrfs_root *tree_root = fs_info->tree_root;
4177         int ret;
4178         int err = 0;
4179
4180         if (ctx.progress_enabled) {
4181                 ctx.tp = TASK_FS_ROOTS;
4182                 task_start(ctx.info);
4183         }
4184
4185         /*
4186          * Just in case we made any changes to the extent tree that weren't
4187          * reflected into the free space cache yet.
4188          */
4189         if (repair)
4190                 reset_cached_block_groups(fs_info);
4191         memset(&wc, 0, sizeof(wc));
4192         cache_tree_init(&wc.shared);
4193         btrfs_init_path(&path);
4194
4195 again:
4196         key.offset = 0;
4197         key.objectid = 0;
4198         key.type = BTRFS_ROOT_ITEM_KEY;
4199         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4200         if (ret < 0) {
4201                 err = 1;
4202                 goto out;
4203         }
4204         tree_node = tree_root->node;
4205         while (1) {
4206                 if (tree_node != tree_root->node) {
4207                         free_root_recs_tree(root_cache);
4208                         btrfs_release_path(&path);
4209                         goto again;
4210                 }
4211                 leaf = path.nodes[0];
4212                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4213                         ret = btrfs_next_leaf(tree_root, &path);
4214                         if (ret) {
4215                                 if (ret < 0)
4216                                         err = 1;
4217                                 break;
4218                         }
4219                         leaf = path.nodes[0];
4220                 }
4221                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4222                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4223                     fs_root_objectid(key.objectid)) {
4224                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4225                                 tmp_root = btrfs_read_fs_root_no_cache(
4226                                                 fs_info, &key);
4227                         } else {
4228                                 key.offset = (u64)-1;
4229                                 tmp_root = btrfs_read_fs_root(
4230                                                 fs_info, &key);
4231                         }
4232                         if (IS_ERR(tmp_root)) {
4233                                 err = 1;
4234                                 goto next;
4235                         }
4236                         ret = check_fs_root(tmp_root, root_cache, &wc);
4237                         if (ret == -EAGAIN) {
4238                                 free_root_recs_tree(root_cache);
4239                                 btrfs_release_path(&path);
4240                                 goto again;
4241                         }
4242                         if (ret)
4243                                 err = 1;
4244                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4245                                 btrfs_free_fs_root(tmp_root);
4246                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4247                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4248                         process_root_ref(leaf, path.slots[0], &key,
4249                                          root_cache);
4250                 }
4251 next:
4252                 path.slots[0]++;
4253         }
4254 out:
4255         btrfs_release_path(&path);
4256         if (err)
4257                 free_extent_cache_tree(&wc.shared);
4258         if (!cache_tree_empty(&wc.shared))
4259                 fprintf(stderr, "warning line %d\n", __LINE__);
4260
4261         task_stop(ctx.info);
4262
4263         return err;
4264 }
4265
4266 /*
4267  * Find the @index according by @ino and name.
4268  * Notice:time efficiency is O(N)
4269  *
4270  * @root:       the root of the fs/file tree
4271  * @index_ret:  the index as return value
4272  * @namebuf:    the name to match
4273  * @name_len:   the length of name to match
4274  * @file_type:  the file_type of INODE_ITEM to match
4275  *
4276  * Returns 0 if found and *@index_ret will be modified with right value
4277  * Returns< 0 not found and *@index_ret will be (u64)-1
4278  */
4279 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4280                           u64 *index_ret, char *namebuf, u32 name_len,
4281                           u8 file_type)
4282 {
4283         struct btrfs_path path;
4284         struct extent_buffer *node;
4285         struct btrfs_dir_item *di;
4286         struct btrfs_key key;
4287         struct btrfs_key location;
4288         char name[BTRFS_NAME_LEN] = {0};
4289
4290         u32 total;
4291         u32 cur = 0;
4292         u32 len;
4293         u32 data_len;
4294         u8 filetype;
4295         int slot;
4296         int ret;
4297
4298         ASSERT(index_ret);
4299
4300         /* search from the last index */
4301         key.objectid = dirid;
4302         key.offset = (u64)-1;
4303         key.type = BTRFS_DIR_INDEX_KEY;
4304
4305         btrfs_init_path(&path);
4306         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4307         if (ret < 0)
4308                 return ret;
4309
4310 loop:
4311         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4312         if (ret) {
4313                 ret = -ENOENT;
4314                 *index_ret = (64)-1;
4315                 goto out;
4316         }
4317         /* Check whether inode_id/filetype/name match */
4318         node = path.nodes[0];
4319         slot = path.slots[0];
4320         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4321         total = btrfs_item_size_nr(node, slot);
4322         while (cur < total) {
4323                 ret = -ENOENT;
4324                 len = btrfs_dir_name_len(node, di);
4325                 data_len = btrfs_dir_data_len(node, di);
4326
4327                 btrfs_dir_item_key_to_cpu(node, di, &location);
4328                 if (location.objectid != location_id ||
4329                     location.type != BTRFS_INODE_ITEM_KEY ||
4330                     location.offset != 0)
4331                         goto next;
4332
4333                 filetype = btrfs_dir_type(node, di);
4334                 if (file_type != filetype)
4335                         goto next;
4336
4337                 if (len > BTRFS_NAME_LEN)
4338                         len = BTRFS_NAME_LEN;
4339
4340                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4341                 if (len != name_len || strncmp(namebuf, name, len))
4342                         goto next;
4343
4344                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4345                 *index_ret = key.offset;
4346                 ret = 0;
4347                 goto out;
4348 next:
4349                 len += sizeof(*di) + data_len;
4350                 di = (struct btrfs_dir_item *)((char *)di + len);
4351                 cur += len;
4352         }
4353         goto loop;
4354
4355 out:
4356         btrfs_release_path(&path);
4357         return ret;
4358 }
4359
4360 /*
4361  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4362  * INODE_REF/INODE_EXTREF match.
4363  *
4364  * @root:       the root of the fs/file tree
4365  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4366  *              value while find index
4367  * @location_key: location key of the struct btrfs_dir_item to match
4368  * @name:       the name to match
4369  * @namelen:    the length of name
4370  * @file_type:  the type of file to math
4371  *
4372  * Return 0 if no error occurred.
4373  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4374  * DIR_ITEM/DIR_INDEX
4375  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4376  * and DIR_ITEM/DIR_INDEX mismatch
4377  */
4378 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4379                          struct btrfs_key *location_key, char *name,
4380                          u32 namelen, u8 file_type)
4381 {
4382         struct btrfs_path path;
4383         struct extent_buffer *node;
4384         struct btrfs_dir_item *di;
4385         struct btrfs_key location;
4386         char namebuf[BTRFS_NAME_LEN] = {0};
4387         u32 total;
4388         u32 cur = 0;
4389         u32 len;
4390         u32 data_len;
4391         u8 filetype;
4392         int slot;
4393         int ret;
4394
4395         /* get the index by traversing all index */
4396         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4397                 ret = find_dir_index(root, key->objectid,
4398                                      location_key->objectid, &key->offset,
4399                                      name, namelen, file_type);
4400                 if (ret)
4401                         ret = DIR_INDEX_MISSING;
4402                 return ret;
4403         }
4404
4405         btrfs_init_path(&path);
4406         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4407         if (ret) {
4408                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4409                         DIR_INDEX_MISSING;
4410                 goto out;
4411         }
4412
4413         /* Check whether inode_id/filetype/name match */
4414         node = path.nodes[0];
4415         slot = path.slots[0];
4416         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4417         total = btrfs_item_size_nr(node, slot);
4418         while (cur < total) {
4419                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4420                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4421
4422                 len = btrfs_dir_name_len(node, di);
4423                 data_len = btrfs_dir_data_len(node, di);
4424
4425                 btrfs_dir_item_key_to_cpu(node, di, &location);
4426                 if (location.objectid != location_key->objectid ||
4427                     location.type != location_key->type ||
4428                     location.offset != location_key->offset)
4429                         goto next;
4430
4431                 filetype = btrfs_dir_type(node, di);
4432                 if (file_type != filetype)
4433                         goto next;
4434
4435                 if (len > BTRFS_NAME_LEN) {
4436                         len = BTRFS_NAME_LEN;
4437                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4438                         root->objectid,
4439                         key->type == BTRFS_DIR_ITEM_KEY ?
4440                         "DIR_ITEM" : "DIR_INDEX",
4441                         key->objectid, key->offset, len);
4442                 }
4443                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4444                                    len);
4445                 if (len != namelen || strncmp(namebuf, name, len))
4446                         goto next;
4447
4448                 ret = 0;
4449                 goto out;
4450 next:
4451                 len += sizeof(*di) + data_len;
4452                 di = (struct btrfs_dir_item *)((char *)di + len);
4453                 cur += len;
4454         }
4455
4456 out:
4457         btrfs_release_path(&path);
4458         return ret;
4459 }
4460
4461 /*
4462  * Prints inode ref error message
4463  */
4464 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4465                                 u64 index, const char *namebuf, int name_len,
4466                                 u8 filetype, int err)
4467 {
4468         if (!err)
4469                 return;
4470
4471         /* root dir error */
4472         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4473                 error(
4474         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4475                       root->objectid, key->objectid, key->offset, namebuf);
4476                 return;
4477         }
4478
4479         /* normal error */
4480         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4481                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4482                       root->objectid, key->offset,
4483                       btrfs_name_hash(namebuf, name_len),
4484                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4485                       namebuf, filetype);
4486         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4487                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4488                       root->objectid, key->offset, index,
4489                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4490                       namebuf, filetype);
4491 }
4492
4493 /*
4494  * Insert the missing inode item.
4495  *
4496  * Returns 0 means success.
4497  * Returns <0 means error.
4498  */
4499 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4500                                      u8 filetype)
4501 {
4502         struct btrfs_key key;
4503         struct btrfs_trans_handle *trans;
4504         struct btrfs_path path;
4505         int ret;
4506
4507         key.objectid = ino;
4508         key.type = BTRFS_INODE_ITEM_KEY;
4509         key.offset = 0;
4510
4511         btrfs_init_path(&path);
4512         trans = btrfs_start_transaction(root, 1);
4513         if (IS_ERR(trans)) {
4514                 ret = -EIO;
4515                 goto out;
4516         }
4517
4518         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4519         if (ret < 0 || !ret)
4520                 goto fail;
4521
4522         /* insert inode item */
4523         create_inode_item_lowmem(trans, root, ino, filetype);
4524         ret = 0;
4525 fail:
4526         btrfs_commit_transaction(trans, root);
4527 out:
4528         if (ret)
4529                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4530                       root->objectid, ino);
4531         btrfs_release_path(&path);
4532         return ret;
4533 }
4534
4535 /*
4536  * Traverse the given INODE_REF and call find_dir_item() to find related
4537  * DIR_ITEM/DIR_INDEX.
4538  *
4539  * @root:       the root of the fs/file tree
4540  * @ref_key:    the key of the INODE_REF
4541  * @refs:       the count of INODE_REF
4542  * @mode:       the st_mode of INODE_ITEM
4543  *
4544  * Return 0 if no error occurred.
4545  */
4546 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4547                            struct btrfs_path *path, char *name_ret,
4548                            u32 *namelen_ret, u64 *refs, int mode)
4549 {
4550         struct btrfs_key key;
4551         struct btrfs_key location;
4552         struct btrfs_inode_ref *ref;
4553         struct extent_buffer *node;
4554         char namebuf[BTRFS_NAME_LEN] = {0};
4555         u32 total;
4556         u32 cur = 0;
4557         u32 len;
4558         u32 name_len;
4559         u64 index;
4560         int err = 0;
4561         int tmp_err;
4562         int slot;
4563
4564         location.objectid = ref_key->objectid;
4565         location.type = BTRFS_INODE_ITEM_KEY;
4566         location.offset = 0;
4567         node = path->nodes[0];
4568         slot = path->slots[0];
4569
4570         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4571         total = btrfs_item_size_nr(node, slot);
4572
4573 next:
4574         /* Update inode ref count */
4575         (*refs)++;
4576
4577         tmp_err = 0;
4578         index = btrfs_inode_ref_index(node, ref);
4579         name_len = btrfs_inode_ref_name_len(node, ref);
4580         if (cur + sizeof(*ref) + name_len > total ||
4581             name_len > BTRFS_NAME_LEN) {
4582                 warning("root %llu INODE_REF[%llu %llu] name too long",
4583                         root->objectid, ref_key->objectid, ref_key->offset);
4584
4585                 if (total < cur + sizeof(*ref))
4586                         goto out;
4587                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4588         } else {
4589                 len = name_len;
4590         }
4591
4592         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4593
4594         /* copy the fisrt name found to name_ret */
4595         if (*refs == 1 && name_ret) {
4596                 memcpy(name_ret, namebuf, len);
4597                 *namelen_ret = len;
4598         }
4599
4600         /* Check root dir ref */
4601         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4602                 if (index != 0 || len != strlen("..") ||
4603                     strncmp("..", namebuf, len) ||
4604                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4605                         /* set err bits then repair will delete the ref */
4606                         err |= DIR_INDEX_MISSING;
4607                         err |= DIR_ITEM_MISSING;
4608                 }
4609                 goto end;
4610         }
4611
4612         /* Find related DIR_INDEX */
4613         key.objectid = ref_key->offset;
4614         key.type = BTRFS_DIR_INDEX_KEY;
4615         key.offset = index;
4616         tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4617
4618         /* Find related dir_item */
4619         key.objectid = ref_key->offset;
4620         key.type = BTRFS_DIR_ITEM_KEY;
4621         key.offset = btrfs_name_hash(namebuf, len);
4622         tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4623
4624 end:
4625         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4626                             imode_to_type(mode), tmp_err);
4627         err |= tmp_err;
4628         len = sizeof(*ref) + name_len;
4629         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4630         cur += len;
4631         if (cur < total)
4632                 goto next;
4633
4634 out:
4635         return err;
4636 }
4637
4638 /*
4639  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4640  * DIR_ITEM/DIR_INDEX.
4641  *
4642  * @root:       the root of the fs/file tree
4643  * @ref_key:    the key of the INODE_EXTREF
4644  * @refs:       the count of INODE_EXTREF
4645  * @mode:       the st_mode of INODE_ITEM
4646  *
4647  * Return 0 if no error occurred.
4648  */
4649 static int check_inode_extref(struct btrfs_root *root,
4650                               struct btrfs_key *ref_key,
4651                               struct extent_buffer *node, int slot, u64 *refs,
4652                               int mode)
4653 {
4654         struct btrfs_key key;
4655         struct btrfs_key location;
4656         struct btrfs_inode_extref *extref;
4657         char namebuf[BTRFS_NAME_LEN] = {0};
4658         u32 total;
4659         u32 cur = 0;
4660         u32 len;
4661         u32 name_len;
4662         u64 index;
4663         u64 parent;
4664         int ret;
4665         int err = 0;
4666
4667         location.objectid = ref_key->objectid;
4668         location.type = BTRFS_INODE_ITEM_KEY;
4669         location.offset = 0;
4670
4671         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4672         total = btrfs_item_size_nr(node, slot);
4673
4674 next:
4675         /* update inode ref count */
4676         (*refs)++;
4677         name_len = btrfs_inode_extref_name_len(node, extref);
4678         index = btrfs_inode_extref_index(node, extref);
4679         parent = btrfs_inode_extref_parent(node, extref);
4680         if (name_len <= BTRFS_NAME_LEN) {
4681                 len = name_len;
4682         } else {
4683                 len = BTRFS_NAME_LEN;
4684                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4685                         root->objectid, ref_key->objectid, ref_key->offset);
4686         }
4687         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4688
4689         /* Check root dir ref name */
4690         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4691                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4692                       root->objectid, ref_key->objectid, ref_key->offset,
4693                       namebuf);
4694                 err |= ROOT_DIR_ERROR;
4695         }
4696
4697         /* find related dir_index */
4698         key.objectid = parent;
4699         key.type = BTRFS_DIR_INDEX_KEY;
4700         key.offset = index;
4701         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4702         err |= ret;
4703
4704         /* find related dir_item */
4705         key.objectid = parent;
4706         key.type = BTRFS_DIR_ITEM_KEY;
4707         key.offset = btrfs_name_hash(namebuf, len);
4708         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4709         err |= ret;
4710
4711         len = sizeof(*extref) + name_len;
4712         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4713         cur += len;
4714
4715         if (cur < total)
4716                 goto next;
4717
4718         return err;
4719 }
4720
4721 /*
4722  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4723  * DIR_ITEM/DIR_INDEX match.
4724  * Return with @index_ret.
4725  *
4726  * @root:       the root of the fs/file tree
4727  * @key:        the key of the INODE_REF/INODE_EXTREF
4728  * @name:       the name in the INODE_REF/INODE_EXTREF
4729  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4730  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4731  *              value (64)-1 means do not check index
4732  * @ext_ref:    the EXTENDED_IREF feature
4733  *
4734  * Return 0 if no error occurred.
4735  * Return >0 for error bitmap
4736  */
4737 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4738                           char *name, int namelen, u64 *index_ret,
4739                           unsigned int ext_ref)
4740 {
4741         struct btrfs_path path;
4742         struct btrfs_inode_ref *ref;
4743         struct btrfs_inode_extref *extref;
4744         struct extent_buffer *node;
4745         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4746         u32 total;
4747         u32 cur = 0;
4748         u32 len;
4749         u32 ref_namelen;
4750         u64 ref_index;
4751         u64 parent;
4752         u64 dir_id;
4753         int slot;
4754         int ret;
4755
4756         ASSERT(index_ret);
4757
4758         btrfs_init_path(&path);
4759         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4760         if (ret) {
4761                 ret = INODE_REF_MISSING;
4762                 goto extref;
4763         }
4764
4765         node = path.nodes[0];
4766         slot = path.slots[0];
4767
4768         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4769         total = btrfs_item_size_nr(node, slot);
4770
4771         /* Iterate all entry of INODE_REF */
4772         while (cur < total) {
4773                 ret = INODE_REF_MISSING;
4774
4775                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4776                 ref_index = btrfs_inode_ref_index(node, ref);
4777                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4778                         goto next_ref;
4779
4780                 if (cur + sizeof(*ref) + ref_namelen > total ||
4781                     ref_namelen > BTRFS_NAME_LEN) {
4782                         warning("root %llu INODE %s[%llu %llu] name too long",
4783                                 root->objectid,
4784                                 key->type == BTRFS_INODE_REF_KEY ?
4785                                         "REF" : "EXTREF",
4786                                 key->objectid, key->offset);
4787
4788                         if (cur + sizeof(*ref) > total)
4789                                 break;
4790                         len = min_t(u32, total - cur - sizeof(*ref),
4791                                     BTRFS_NAME_LEN);
4792                 } else {
4793                         len = ref_namelen;
4794                 }
4795
4796                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4797                                    len);
4798
4799                 if (len != namelen || strncmp(ref_namebuf, name, len))
4800                         goto next_ref;
4801
4802                 *index_ret = ref_index;
4803                 ret = 0;
4804                 goto out;
4805 next_ref:
4806                 len = sizeof(*ref) + ref_namelen;
4807                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4808                 cur += len;
4809         }
4810
4811 extref:
4812         /* Skip if not support EXTENDED_IREF feature */
4813         if (!ext_ref)
4814                 goto out;
4815
4816         btrfs_release_path(&path);
4817         btrfs_init_path(&path);
4818
4819         dir_id = key->offset;
4820         key->type = BTRFS_INODE_EXTREF_KEY;
4821         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4822
4823         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4824         if (ret) {
4825                 ret = INODE_REF_MISSING;
4826                 goto out;
4827         }
4828
4829         node = path.nodes[0];
4830         slot = path.slots[0];
4831
4832         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4833         cur = 0;
4834         total = btrfs_item_size_nr(node, slot);
4835
4836         /* Iterate all entry of INODE_EXTREF */
4837         while (cur < total) {
4838                 ret = INODE_REF_MISSING;
4839
4840                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4841                 ref_index = btrfs_inode_extref_index(node, extref);
4842                 parent = btrfs_inode_extref_parent(node, extref);
4843                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4844                         goto next_extref;
4845
4846                 if (parent != dir_id)
4847                         goto next_extref;
4848
4849                 if (ref_namelen <= BTRFS_NAME_LEN) {
4850                         len = ref_namelen;
4851                 } else {
4852                         len = BTRFS_NAME_LEN;
4853                         warning("root %llu INODE %s[%llu %llu] name too long",
4854                                 root->objectid,
4855                                 key->type == BTRFS_INODE_REF_KEY ?
4856                                         "REF" : "EXTREF",
4857                                 key->objectid, key->offset);
4858                 }
4859                 read_extent_buffer(node, ref_namebuf,
4860                                    (unsigned long)(extref + 1), len);
4861
4862                 if (len != namelen || strncmp(ref_namebuf, name, len))
4863                         goto next_extref;
4864
4865                 *index_ret = ref_index;
4866                 ret = 0;
4867                 goto out;
4868
4869 next_extref:
4870                 len = sizeof(*extref) + ref_namelen;
4871                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4872                 cur += len;
4873
4874         }
4875 out:
4876         btrfs_release_path(&path);
4877         return ret;
4878 }
4879
4880 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4881                                u64 ino, u64 index, const char *namebuf,
4882                                int name_len, u8 filetype, int err)
4883 {
4884         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4885                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4886                       root->objectid, key->objectid, key->offset, namebuf,
4887                       filetype,
4888                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4889         }
4890
4891         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4892                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4893                       root->objectid, key->objectid, index, namebuf, filetype,
4894                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4895         }
4896
4897         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4898                 error(
4899                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4900                       root->objectid, ino, index, namebuf, filetype,
4901                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4902         }
4903
4904         if (err & INODE_REF_MISSING)
4905                 error(
4906                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4907                       root->objectid, ino, key->objectid, namebuf, filetype);
4908
4909 }
4910
4911 /*
4912  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4913  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4914  *
4915  * @root:       the root of the fs/file tree
4916  * @key:        the key of the INODE_REF/INODE_EXTREF
4917  * @path:       the path
4918  * @size:       the st_size of the INODE_ITEM
4919  * @ext_ref:    the EXTENDED_IREF feature
4920  *
4921  * Return 0 if no error occurred.
4922  */
4923 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4924                           struct btrfs_path *path, u64 *size,
4925                           unsigned int ext_ref)
4926 {
4927         struct btrfs_dir_item *di;
4928         struct btrfs_inode_item *ii;
4929         struct btrfs_key key;
4930         struct btrfs_key location;
4931         struct extent_buffer *node;
4932         int slot;
4933         char namebuf[BTRFS_NAME_LEN] = {0};
4934         u32 total;
4935         u32 cur = 0;
4936         u32 len;
4937         u32 name_len;
4938         u32 data_len;
4939         u8 filetype;
4940         u32 mode;
4941         u64 index;
4942         int ret;
4943         int err = 0;
4944         int tmp_err;
4945
4946         node = path->nodes[0];
4947         slot = path->slots[0];
4948         /*
4949          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4950          * ignore index check.
4951          */
4952         if (di_key->type == BTRFS_DIR_INDEX_KEY)
4953                 index = di_key->offset;
4954         else
4955                 index = (u64)-1;
4956
4957         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4958         total = btrfs_item_size_nr(node, slot);
4959         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4960
4961         while (cur < total) {
4962                 data_len = btrfs_dir_data_len(node, di);
4963                 tmp_err = 0;
4964                 if (data_len)
4965                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4966                               root->objectid,
4967               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4968                               di_key->objectid, di_key->offset, data_len);
4969
4970                 name_len = btrfs_dir_name_len(node, di);
4971                 if (name_len <= BTRFS_NAME_LEN) {
4972                         len = name_len;
4973                 } else {
4974                         len = BTRFS_NAME_LEN;
4975                         warning("root %llu %s[%llu %llu] name too long",
4976                                 root->objectid,
4977                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4978                                 di_key->objectid, di_key->offset);
4979                 }
4980                 (*size) += name_len;
4981                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4982                                    len);
4983                 filetype = btrfs_dir_type(node, di);
4984
4985                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
4986                     di_key->offset != btrfs_name_hash(namebuf, len)) {
4987                         err |= -EIO;
4988                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4989                         root->objectid, di_key->objectid, di_key->offset,
4990                         namebuf, len, filetype, di_key->offset,
4991                         btrfs_name_hash(namebuf, len));
4992                 }
4993
4994                 btrfs_dir_item_key_to_cpu(node, di, &location);
4995                 /* Ignore related ROOT_ITEM check */
4996                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4997                         goto next;
4998
4999                 btrfs_release_path(path);
5000                 /* Check relative INODE_ITEM(existence/filetype) */
5001                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5002                 if (ret) {
5003                         tmp_err |= INODE_ITEM_MISSING;
5004                         goto next;
5005                 }
5006
5007                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5008                                     struct btrfs_inode_item);
5009                 mode = btrfs_inode_mode(path->nodes[0], ii);
5010                 if (imode_to_type(mode) != filetype) {
5011                         tmp_err |= INODE_ITEM_MISMATCH;
5012                         goto next;
5013                 }
5014
5015                 /* Check relative INODE_REF/INODE_EXTREF */
5016                 key.objectid = location.objectid;
5017                 key.type = BTRFS_INODE_REF_KEY;
5018                 key.offset = di_key->objectid;
5019                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5020                                           &index, ext_ref);
5021
5022                 /* check relative INDEX/ITEM */
5023                 key.objectid = di_key->objectid;
5024                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5025                         key.type = BTRFS_DIR_INDEX_KEY;
5026                         key.offset = index;
5027                 } else {
5028                         key.type = BTRFS_DIR_ITEM_KEY;
5029                         key.offset = btrfs_name_hash(namebuf, name_len);
5030                 }
5031
5032                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5033                                          name_len, filetype);
5034                 /* find_dir_item may find index */
5035                 if (key.type == BTRFS_DIR_INDEX_KEY)
5036                         index = key.offset;
5037 next:
5038                 btrfs_release_path(path);
5039                 print_dir_item_err(root, di_key, location.objectid, index,
5040                                    namebuf, name_len, filetype, tmp_err);
5041                 err |= tmp_err;
5042                 len = sizeof(*di) + name_len + data_len;
5043                 di = (struct btrfs_dir_item *)((char *)di + len);
5044                 cur += len;
5045
5046                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5047                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5048                               root->objectid, di_key->objectid,
5049                               di_key->offset);
5050                         break;
5051                 }
5052         }
5053
5054         /* research path */
5055         btrfs_release_path(path);
5056         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5057         if (ret)
5058                 err |= ret > 0 ? -ENOENT : ret;
5059         return err;
5060 }
5061
5062 /*
5063  * Check file extent datasum/hole, update the size of the file extents,
5064  * check and update the last offset of the file extent.
5065  *
5066  * @root:       the root of fs/file tree.
5067  * @fkey:       the key of the file extent.
5068  * @nodatasum:  INODE_NODATASUM feature.
5069  * @size:       the sum of all EXTENT_DATA items size for this inode.
5070  * @end:        the offset of the last extent.
5071  *
5072  * Return 0 if no error occurred.
5073  */
5074 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5075                              struct extent_buffer *node, int slot,
5076                              unsigned int nodatasum, u64 *size, u64 *end)
5077 {
5078         struct btrfs_file_extent_item *fi;
5079         u64 disk_bytenr;
5080         u64 disk_num_bytes;
5081         u64 extent_num_bytes;
5082         u64 extent_offset;
5083         u64 csum_found;         /* In byte size, sectorsize aligned */
5084         u64 search_start;       /* Logical range start we search for csum */
5085         u64 search_len;         /* Logical range len we search for csum */
5086         unsigned int extent_type;
5087         unsigned int is_hole;
5088         int compressed = 0;
5089         int ret;
5090         int err = 0;
5091
5092         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5093
5094         /* Check inline extent */
5095         extent_type = btrfs_file_extent_type(node, fi);
5096         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5097                 struct btrfs_item *e = btrfs_item_nr(slot);
5098                 u32 item_inline_len;
5099
5100                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5101                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5102                 compressed = btrfs_file_extent_compression(node, fi);
5103                 if (extent_num_bytes == 0) {
5104                         error(
5105                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5106                                 root->objectid, fkey->objectid, fkey->offset);
5107                         err |= FILE_EXTENT_ERROR;
5108                 }
5109                 if (!compressed && extent_num_bytes != item_inline_len) {
5110                         error(
5111                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5112                                 root->objectid, fkey->objectid, fkey->offset,
5113                                 extent_num_bytes, item_inline_len);
5114                         err |= FILE_EXTENT_ERROR;
5115                 }
5116                 *end += extent_num_bytes;
5117                 *size += extent_num_bytes;
5118                 return err;
5119         }
5120
5121         /* Check extent type */
5122         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5123                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5124                 err |= FILE_EXTENT_ERROR;
5125                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5126                       root->objectid, fkey->objectid, fkey->offset);
5127                 return err;
5128         }
5129
5130         /* Check REG_EXTENT/PREALLOC_EXTENT */
5131         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5132         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5133         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5134         extent_offset = btrfs_file_extent_offset(node, fi);
5135         compressed = btrfs_file_extent_compression(node, fi);
5136         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5137
5138         /*
5139          * Check EXTENT_DATA csum
5140          *
5141          * For plain (uncompressed) extent, we should only check the range
5142          * we're referring to, as it's possible that part of prealloc extent
5143          * has been written, and has csum:
5144          *
5145          * |<--- Original large preallocated extent A ---->|
5146          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5147          *      No csum                         Has csum
5148          *
5149          * For compressed extent, we should check the whole range.
5150          */
5151         if (!compressed) {
5152                 search_start = disk_bytenr + extent_offset;
5153                 search_len = extent_num_bytes;
5154         } else {
5155                 search_start = disk_bytenr;
5156                 search_len = disk_num_bytes;
5157         }
5158         ret = count_csum_range(root, search_start, search_len, &csum_found);
5159         if (csum_found > 0 && nodatasum) {
5160                 err |= ODD_CSUM_ITEM;
5161                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5162                       root->objectid, fkey->objectid, fkey->offset);
5163         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5164                    !is_hole && (ret < 0 || csum_found < search_len)) {
5165                 err |= CSUM_ITEM_MISSING;
5166                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5167                       root->objectid, fkey->objectid, fkey->offset,
5168                       csum_found, search_len);
5169         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5170                 err |= ODD_CSUM_ITEM;
5171                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5172                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5173         }
5174
5175         /* Check EXTENT_DATA hole */
5176         if (!no_holes && *end != fkey->offset) {
5177                 err |= FILE_EXTENT_ERROR;
5178                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5179                       root->objectid, fkey->objectid, fkey->offset);
5180         }
5181
5182         *end += extent_num_bytes;
5183         if (!is_hole)
5184                 *size += extent_num_bytes;
5185
5186         return err;
5187 }
5188
5189 /*
5190  * Set inode item nbytes to @nbytes
5191  *
5192  * Returns  0     on success
5193  * Returns  != 0  on error
5194  */
5195 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5196                                       struct btrfs_path *path,
5197                                       u64 ino, u64 nbytes)
5198 {
5199         struct btrfs_trans_handle *trans;
5200         struct btrfs_inode_item *ii;
5201         struct btrfs_key key;
5202         struct btrfs_key research_key;
5203         int err = 0;
5204         int ret;
5205
5206         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5207
5208         key.objectid = ino;
5209         key.type = BTRFS_INODE_ITEM_KEY;
5210         key.offset = 0;
5211
5212         trans = btrfs_start_transaction(root, 1);
5213         if (IS_ERR(trans)) {
5214                 ret = PTR_ERR(trans);
5215                 err |= ret;
5216                 goto out;
5217         }
5218
5219         btrfs_release_path(path);
5220         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5221         if (ret > 0)
5222                 ret = -ENOENT;
5223         if (ret) {
5224                 err |= ret;
5225                 goto fail;
5226         }
5227
5228         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5229                             struct btrfs_inode_item);
5230         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5231         btrfs_mark_buffer_dirty(path->nodes[0]);
5232 fail:
5233         btrfs_commit_transaction(trans, root);
5234 out:
5235         if (ret)
5236                 error("failed to set nbytes in inode %llu root %llu",
5237                       ino, root->root_key.objectid);
5238         else
5239                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5240                        root->root_key.objectid, nbytes);
5241
5242         /* research path */
5243         btrfs_release_path(path);
5244         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5245         err |= ret;
5246
5247         return err;
5248 }
5249
5250 /*
5251  * Set directory inode isize to @isize.
5252  *
5253  * Returns 0     on success.
5254  * Returns != 0  on error.
5255  */
5256 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5257                                    struct btrfs_path *path,
5258                                    u64 ino, u64 isize)
5259 {
5260         struct btrfs_trans_handle *trans;
5261         struct btrfs_inode_item *ii;
5262         struct btrfs_key key;
5263         struct btrfs_key research_key;
5264         int ret;
5265         int err = 0;
5266
5267         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5268
5269         key.objectid = ino;
5270         key.type = BTRFS_INODE_ITEM_KEY;
5271         key.offset = 0;
5272
5273         trans = btrfs_start_transaction(root, 1);
5274         if (IS_ERR(trans)) {
5275                 ret = PTR_ERR(trans);
5276                 err |= ret;
5277                 goto out;
5278         }
5279
5280         btrfs_release_path(path);
5281         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5282         if (ret > 0)
5283                 ret = -ENOENT;
5284         if (ret) {
5285                 err |= ret;
5286                 goto fail;
5287         }
5288
5289         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5290                             struct btrfs_inode_item);
5291         btrfs_set_inode_size(path->nodes[0], ii, isize);
5292         btrfs_mark_buffer_dirty(path->nodes[0]);
5293 fail:
5294         btrfs_commit_transaction(trans, root);
5295 out:
5296         if (ret)
5297                 error("failed to set isize in inode %llu root %llu",
5298                       ino, root->root_key.objectid);
5299         else
5300                 printf("Set isize in inode %llu root %llu to %llu\n",
5301                        ino, root->root_key.objectid, isize);
5302
5303         btrfs_release_path(path);
5304         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5305         err |= ret;
5306
5307         return err;
5308 }
5309
5310 /*
5311  * Wrapper function for btrfs_add_orphan_item().
5312  *
5313  * Returns 0     on success.
5314  * Returns != 0  on error.
5315  */
5316 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5317                                            struct btrfs_path *path, u64 ino)
5318 {
5319         struct btrfs_trans_handle *trans;
5320         struct btrfs_key research_key;
5321         int ret;
5322         int err = 0;
5323
5324         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5325
5326         trans = btrfs_start_transaction(root, 1);
5327         if (IS_ERR(trans)) {
5328                 ret = PTR_ERR(trans);
5329                 err |= ret;
5330                 goto out;
5331         }
5332
5333         btrfs_release_path(path);
5334         ret = btrfs_add_orphan_item(trans, root, path, ino);
5335         err |= ret;
5336         btrfs_commit_transaction(trans, root);
5337 out:
5338         if (ret)
5339                 error("failed to add inode %llu as orphan item root %llu",
5340                       ino, root->root_key.objectid);
5341         else
5342                 printf("Added inode %llu as orphan item root %llu\n",
5343                        ino, root->root_key.objectid);
5344
5345         btrfs_release_path(path);
5346         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5347         err |= ret;
5348
5349         return err;
5350 }
5351
5352 /*
5353  * Check INODE_ITEM and related ITEMs (the same inode number)
5354  * 1. check link count
5355  * 2. check inode ref/extref
5356  * 3. check dir item/index
5357  *
5358  * @ext_ref:    the EXTENDED_IREF feature
5359  *
5360  * Return 0 if no error occurred.
5361  * Return >0 for error or hit the traversal is done(by error bitmap)
5362  */
5363 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5364                             unsigned int ext_ref)
5365 {
5366         struct extent_buffer *node;
5367         struct btrfs_inode_item *ii;
5368         struct btrfs_key key;
5369         u64 inode_id;
5370         u32 mode;
5371         u64 nlink;
5372         u64 nbytes;
5373         u64 isize;
5374         u64 size = 0;
5375         u64 refs = 0;
5376         u64 extent_end = 0;
5377         u64 extent_size = 0;
5378         unsigned int dir;
5379         unsigned int nodatasum;
5380         int slot;
5381         int ret;
5382         int err = 0;
5383         char namebuf[BTRFS_NAME_LEN] = {0};
5384         u32 name_len = 0;
5385
5386         node = path->nodes[0];
5387         slot = path->slots[0];
5388
5389         btrfs_item_key_to_cpu(node, &key, slot);
5390         inode_id = key.objectid;
5391
5392         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5393                 ret = btrfs_next_item(root, path);
5394                 if (ret > 0)
5395                         err |= LAST_ITEM;
5396                 return err;
5397         }
5398
5399         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5400         isize = btrfs_inode_size(node, ii);
5401         nbytes = btrfs_inode_nbytes(node, ii);
5402         mode = btrfs_inode_mode(node, ii);
5403         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5404         nlink = btrfs_inode_nlink(node, ii);
5405         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5406
5407         while (1) {
5408                 ret = btrfs_next_item(root, path);
5409                 if (ret < 0) {
5410                         /* out will fill 'err' rusing current statistics */
5411                         goto out;
5412                 } else if (ret > 0) {
5413                         err |= LAST_ITEM;
5414                         goto out;
5415                 }
5416
5417                 node = path->nodes[0];
5418                 slot = path->slots[0];
5419                 btrfs_item_key_to_cpu(node, &key, slot);
5420                 if (key.objectid != inode_id)
5421                         goto out;
5422
5423                 switch (key.type) {
5424                 case BTRFS_INODE_REF_KEY:
5425                         ret = check_inode_ref(root, &key, path, namebuf,
5426                                               &name_len, &refs, mode);
5427                         err |= ret;
5428                         break;
5429                 case BTRFS_INODE_EXTREF_KEY:
5430                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5431                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5432                                         root->objectid, key.objectid,
5433                                         key.offset);
5434                         ret = check_inode_extref(root, &key, node, slot, &refs,
5435                                                  mode);
5436                         err |= ret;
5437                         break;
5438                 case BTRFS_DIR_ITEM_KEY:
5439                 case BTRFS_DIR_INDEX_KEY:
5440                         if (!dir) {
5441                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5442                                         root->objectid, inode_id,
5443                                         imode_to_type(mode), key.objectid,
5444                                         key.offset);
5445                         }
5446                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5447                         err |= ret;
5448                         break;
5449                 case BTRFS_EXTENT_DATA_KEY:
5450                         if (dir) {
5451                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5452                                         root->objectid, inode_id, key.objectid,
5453                                         key.offset);
5454                         }
5455                         ret = check_file_extent(root, &key, node, slot,
5456                                                 nodatasum, &extent_size,
5457                                                 &extent_end);
5458                         err |= ret;
5459                         break;
5460                 case BTRFS_XATTR_ITEM_KEY:
5461                         break;
5462                 default:
5463                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5464                               key.objectid, key.type, key.offset);
5465                 }
5466         }
5467
5468 out:
5469         /* verify INODE_ITEM nlink/isize/nbytes */
5470         if (dir) {
5471                 if (nlink != 1) {
5472                         err |= LINK_COUNT_ERROR;
5473                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5474                               root->objectid, inode_id, nlink);
5475                 }
5476
5477                 /*
5478                  * Just a warning, as dir inode nbytes is just an
5479                  * instructive value.
5480                  */
5481                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5482                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5483                                 root->objectid, inode_id,
5484                                 root->fs_info->nodesize);
5485                 }
5486
5487                 if (isize != size) {
5488                         if (repair)
5489                                 ret = repair_dir_isize_lowmem(root, path,
5490                                                               inode_id, size);
5491                         if (!repair || ret) {
5492                                 err |= ISIZE_ERROR;
5493                                 error(
5494                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5495                                       root->objectid, inode_id, isize, size);
5496                         }
5497                 }
5498         } else {
5499                 if (nlink != refs) {
5500                         err |= LINK_COUNT_ERROR;
5501                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5502                               root->objectid, inode_id, nlink, refs);
5503                 } else if (!nlink) {
5504                         if (repair)
5505                                 ret = repair_inode_orphan_item_lowmem(root,
5506                                                               path, inode_id);
5507                         if (!repair || ret) {
5508                                 err |= ORPHAN_ITEM;
5509                                 error("root %llu INODE[%llu] is orphan item",
5510                                       root->objectid, inode_id);
5511                         }
5512                 }
5513
5514                 if (!nbytes && !no_holes && extent_end < isize) {
5515                         err |= NBYTES_ERROR;
5516                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5517                               root->objectid, inode_id, isize);
5518                 }
5519
5520                 if (nbytes != extent_size) {
5521                         if (repair)
5522                                 ret = repair_inode_nbytes_lowmem(root, path,
5523                                                          inode_id, extent_size);
5524                         if (!repair || ret) {
5525                                 err |= NBYTES_ERROR;
5526                                 error(
5527         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5528                                       root->objectid, inode_id, nbytes,
5529                                       extent_size);
5530                         }
5531                 }
5532         }
5533
5534         return err;
5535 }
5536
5537 /*
5538  * check first root dir's inode_item and inode_ref
5539  *
5540  * returns 0 means no error
5541  * returns >0 means error
5542  * returns <0 means fatal error
5543  */
5544 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5545 {
5546         struct btrfs_path path;
5547         struct btrfs_key key;
5548         struct btrfs_inode_item *ii;
5549         u64 index;
5550         u32 mode;
5551         int err = 0;
5552         int ret;
5553
5554         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5555         key.type = BTRFS_INODE_ITEM_KEY;
5556         key.offset = 0;
5557
5558         /* For root being dropped, we don't need to check first inode */
5559         if (btrfs_root_refs(&root->root_item) == 0 &&
5560             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5561             BTRFS_FIRST_FREE_OBJECTID)
5562                 return 0;
5563
5564         btrfs_init_path(&path);
5565         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5566         if (ret < 0)
5567                 goto out;
5568         if (ret > 0) {
5569                 ret = 0;
5570                 err |= INODE_ITEM_MISSING;
5571         } else {
5572                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5573                                     struct btrfs_inode_item);
5574                 mode = btrfs_inode_mode(path.nodes[0], ii);
5575                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5576                         err |= INODE_ITEM_MISMATCH;
5577         }
5578
5579         /* lookup first inode ref */
5580         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5581         key.type = BTRFS_INODE_REF_KEY;
5582         /* special index value */
5583         index = 0;
5584
5585         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5586         if (ret < 0)
5587                 goto out;
5588         err |= ret;
5589
5590 out:
5591         btrfs_release_path(&path);
5592         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5593                 error("root dir INODE_ITEM is %s",
5594                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5595         if (err & INODE_REF_MISSING)
5596                 error("root dir INODE_REF is missing");
5597
5598         return ret < 0 ? ret : err;
5599 }
5600
5601 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5602                                                 u64 parent, u64 root)
5603 {
5604         struct rb_node *node;
5605         struct tree_backref *back = NULL;
5606         struct tree_backref match = {
5607                 .node = {
5608                         .is_data = 0,
5609                 },
5610         };
5611
5612         if (parent) {
5613                 match.parent = parent;
5614                 match.node.full_backref = 1;
5615         } else {
5616                 match.root = root;
5617         }
5618
5619         node = rb_search(&rec->backref_tree, &match.node.node,
5620                          (rb_compare_keys)compare_extent_backref, NULL);
5621         if (node)
5622                 back = to_tree_backref(rb_node_to_extent_backref(node));
5623
5624         return back;
5625 }
5626
5627 static struct data_backref *find_data_backref(struct extent_record *rec,
5628                                                 u64 parent, u64 root,
5629                                                 u64 owner, u64 offset,
5630                                                 int found_ref,
5631                                                 u64 disk_bytenr, u64 bytes)
5632 {
5633         struct rb_node *node;
5634         struct data_backref *back = NULL;
5635         struct data_backref match = {
5636                 .node = {
5637                         .is_data = 1,
5638                 },
5639                 .owner = owner,
5640                 .offset = offset,
5641                 .bytes = bytes,
5642                 .found_ref = found_ref,
5643                 .disk_bytenr = disk_bytenr,
5644         };
5645
5646         if (parent) {
5647                 match.parent = parent;
5648                 match.node.full_backref = 1;
5649         } else {
5650                 match.root = root;
5651         }
5652
5653         node = rb_search(&rec->backref_tree, &match.node.node,
5654                          (rb_compare_keys)compare_extent_backref, NULL);
5655         if (node)
5656                 back = to_data_backref(rb_node_to_extent_backref(node));
5657
5658         return back;
5659 }
5660 /*
5661  * Iterate all item on the tree and call check_inode_item() to check.
5662  *
5663  * @root:       the root of the tree to be checked.
5664  * @ext_ref:    the EXTENDED_IREF feature
5665  *
5666  * Return 0 if no error found.
5667  * Return <0 for error.
5668  */
5669 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5670 {
5671         struct btrfs_path path;
5672         struct node_refs nrefs;
5673         struct btrfs_root_item *root_item = &root->root_item;
5674         int ret;
5675         int level;
5676         int err = 0;
5677
5678         /*
5679          * We need to manually check the first inode item(256)
5680          * As the following traversal function will only start from
5681          * the first inode item in the leaf, if inode item(256) is missing
5682          * we will just skip it forever.
5683          */
5684         ret = check_fs_first_inode(root, ext_ref);
5685         if (ret < 0)
5686                 return ret;
5687         err |= !!ret;
5688
5689         memset(&nrefs, 0, sizeof(nrefs));
5690         level = btrfs_header_level(root->node);
5691         btrfs_init_path(&path);
5692
5693         if (btrfs_root_refs(root_item) > 0 ||
5694             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5695                 path.nodes[level] = root->node;
5696                 path.slots[level] = 0;
5697                 extent_buffer_get(root->node);
5698         } else {
5699                 struct btrfs_key key;
5700
5701                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5702                 level = root_item->drop_level;
5703                 path.lowest_level = level;
5704                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5705                 if (ret < 0)
5706                         goto out;
5707                 ret = 0;
5708         }
5709
5710         while (1) {
5711                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5712                 err |= !!ret;
5713
5714                 /* if ret is negative, walk shall stop */
5715                 if (ret < 0) {
5716                         ret = err;
5717                         break;
5718                 }
5719
5720                 ret = walk_up_tree_v2(root, &path, &level);
5721                 if (ret != 0) {
5722                         /* Normal exit, reset ret to err */
5723                         ret = err;
5724                         break;
5725                 }
5726         }
5727
5728 out:
5729         btrfs_release_path(&path);
5730         return ret;
5731 }
5732
5733 /*
5734  * Find the relative ref for root_ref and root_backref.
5735  *
5736  * @root:       the root of the root tree.
5737  * @ref_key:    the key of the root ref.
5738  *
5739  * Return 0 if no error occurred.
5740  */
5741 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5742                           struct extent_buffer *node, int slot)
5743 {
5744         struct btrfs_path path;
5745         struct btrfs_key key;
5746         struct btrfs_root_ref *ref;
5747         struct btrfs_root_ref *backref;
5748         char ref_name[BTRFS_NAME_LEN] = {0};
5749         char backref_name[BTRFS_NAME_LEN] = {0};
5750         u64 ref_dirid;
5751         u64 ref_seq;
5752         u32 ref_namelen;
5753         u64 backref_dirid;
5754         u64 backref_seq;
5755         u32 backref_namelen;
5756         u32 len;
5757         int ret;
5758         int err = 0;
5759
5760         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5761         ref_dirid = btrfs_root_ref_dirid(node, ref);
5762         ref_seq = btrfs_root_ref_sequence(node, ref);
5763         ref_namelen = btrfs_root_ref_name_len(node, ref);
5764
5765         if (ref_namelen <= BTRFS_NAME_LEN) {
5766                 len = ref_namelen;
5767         } else {
5768                 len = BTRFS_NAME_LEN;
5769                 warning("%s[%llu %llu] ref_name too long",
5770                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5771                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5772                         ref_key->offset);
5773         }
5774         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5775
5776         /* Find relative root_ref */
5777         key.objectid = ref_key->offset;
5778         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5779         key.offset = ref_key->objectid;
5780
5781         btrfs_init_path(&path);
5782         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5783         if (ret) {
5784                 err |= ROOT_REF_MISSING;
5785                 error("%s[%llu %llu] couldn't find relative ref",
5786                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5787                       "ROOT_REF" : "ROOT_BACKREF",
5788                       ref_key->objectid, ref_key->offset);
5789                 goto out;
5790         }
5791
5792         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5793                                  struct btrfs_root_ref);
5794         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5795         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5796         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5797
5798         if (backref_namelen <= BTRFS_NAME_LEN) {
5799                 len = backref_namelen;
5800         } else {
5801                 len = BTRFS_NAME_LEN;
5802                 warning("%s[%llu %llu] ref_name too long",
5803                         key.type == BTRFS_ROOT_REF_KEY ?
5804                         "ROOT_REF" : "ROOT_BACKREF",
5805                         key.objectid, key.offset);
5806         }
5807         read_extent_buffer(path.nodes[0], backref_name,
5808                            (unsigned long)(backref + 1), len);
5809
5810         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5811             ref_namelen != backref_namelen ||
5812             strncmp(ref_name, backref_name, len)) {
5813                 err |= ROOT_REF_MISMATCH;
5814                 error("%s[%llu %llu] mismatch relative ref",
5815                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5816                       "ROOT_REF" : "ROOT_BACKREF",
5817                       ref_key->objectid, ref_key->offset);
5818         }
5819 out:
5820         btrfs_release_path(&path);
5821         return err;
5822 }
5823
5824 /*
5825  * Check all fs/file tree in low_memory mode.
5826  *
5827  * 1. for fs tree root item, call check_fs_root_v2()
5828  * 2. for fs tree root ref/backref, call check_root_ref()
5829  *
5830  * Return 0 if no error occurred.
5831  */
5832 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5833 {
5834         struct btrfs_root *tree_root = fs_info->tree_root;
5835         struct btrfs_root *cur_root = NULL;
5836         struct btrfs_path path;
5837         struct btrfs_key key;
5838         struct extent_buffer *node;
5839         unsigned int ext_ref;
5840         int slot;
5841         int ret;
5842         int err = 0;
5843
5844         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5845
5846         btrfs_init_path(&path);
5847         key.objectid = BTRFS_FS_TREE_OBJECTID;
5848         key.offset = 0;
5849         key.type = BTRFS_ROOT_ITEM_KEY;
5850
5851         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5852         if (ret < 0) {
5853                 err = ret;
5854                 goto out;
5855         } else if (ret > 0) {
5856                 err = -ENOENT;
5857                 goto out;
5858         }
5859
5860         while (1) {
5861                 node = path.nodes[0];
5862                 slot = path.slots[0];
5863                 btrfs_item_key_to_cpu(node, &key, slot);
5864                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5865                         goto out;
5866                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5867                     fs_root_objectid(key.objectid)) {
5868                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5869                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5870                                                                        &key);
5871                         } else {
5872                                 key.offset = (u64)-1;
5873                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5874                         }
5875
5876                         if (IS_ERR(cur_root)) {
5877                                 error("Fail to read fs/subvol tree: %lld",
5878                                       key.objectid);
5879                                 err = -EIO;
5880                                 goto next;
5881                         }
5882
5883                         ret = check_fs_root_v2(cur_root, ext_ref);
5884                         err |= ret;
5885
5886                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5887                                 btrfs_free_fs_root(cur_root);
5888                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5889                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5890                         ret = check_root_ref(tree_root, &key, node, slot);
5891                         err |= ret;
5892                 }
5893 next:
5894                 ret = btrfs_next_item(tree_root, &path);
5895                 if (ret > 0)
5896                         goto out;
5897                 if (ret < 0) {
5898                         err = ret;
5899                         goto out;
5900                 }
5901         }
5902
5903 out:
5904         btrfs_release_path(&path);
5905         return err;
5906 }
5907
5908 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5909                           struct cache_tree *root_cache)
5910 {
5911         int ret;
5912
5913         if (!ctx.progress_enabled)
5914                 fprintf(stderr, "checking fs roots\n");
5915         if (check_mode == CHECK_MODE_LOWMEM)
5916                 ret = check_fs_roots_v2(fs_info);
5917         else
5918                 ret = check_fs_roots(fs_info, root_cache);
5919
5920         return ret;
5921 }
5922
5923 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5924 {
5925         struct extent_backref *back, *tmp;
5926         struct tree_backref *tback;
5927         struct data_backref *dback;
5928         u64 found = 0;
5929         int err = 0;
5930
5931         rbtree_postorder_for_each_entry_safe(back, tmp,
5932                                              &rec->backref_tree, node) {
5933                 if (!back->found_extent_tree) {
5934                         err = 1;
5935                         if (!print_errs)
5936                                 goto out;
5937                         if (back->is_data) {
5938                                 dback = to_data_backref(back);
5939                                 fprintf(stderr, "Data backref %llu %s %llu"
5940                                         " owner %llu offset %llu num_refs %lu"
5941                                         " not found in extent tree\n",
5942                                         (unsigned long long)rec->start,
5943                                         back->full_backref ?
5944                                         "parent" : "root",
5945                                         back->full_backref ?
5946                                         (unsigned long long)dback->parent:
5947                                         (unsigned long long)dback->root,
5948                                         (unsigned long long)dback->owner,
5949                                         (unsigned long long)dback->offset,
5950                                         (unsigned long)dback->num_refs);
5951                         } else {
5952                                 tback = to_tree_backref(back);
5953                                 fprintf(stderr, "Tree backref %llu parent %llu"
5954                                         " root %llu not found in extent tree\n",
5955                                         (unsigned long long)rec->start,
5956                                         (unsigned long long)tback->parent,
5957                                         (unsigned long long)tback->root);
5958                         }
5959                 }
5960                 if (!back->is_data && !back->found_ref) {
5961                         err = 1;
5962                         if (!print_errs)
5963                                 goto out;
5964                         tback = to_tree_backref(back);
5965                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5966                                 (unsigned long long)rec->start,
5967                                 back->full_backref ? "parent" : "root",
5968                                 back->full_backref ?
5969                                 (unsigned long long)tback->parent :
5970                                 (unsigned long long)tback->root, back);
5971                 }
5972                 if (back->is_data) {
5973                         dback = to_data_backref(back);
5974                         if (dback->found_ref != dback->num_refs) {
5975                                 err = 1;
5976                                 if (!print_errs)
5977                                         goto out;
5978                                 fprintf(stderr, "Incorrect local backref count"
5979                                         " on %llu %s %llu owner %llu"
5980                                         " offset %llu found %u wanted %u back %p\n",
5981                                         (unsigned long long)rec->start,
5982                                         back->full_backref ?
5983                                         "parent" : "root",
5984                                         back->full_backref ?
5985                                         (unsigned long long)dback->parent:
5986                                         (unsigned long long)dback->root,
5987                                         (unsigned long long)dback->owner,
5988                                         (unsigned long long)dback->offset,
5989                                         dback->found_ref, dback->num_refs, back);
5990                         }
5991                         if (dback->disk_bytenr != rec->start) {
5992                                 err = 1;
5993                                 if (!print_errs)
5994                                         goto out;
5995                                 fprintf(stderr, "Backref disk bytenr does not"
5996                                         " match extent record, bytenr=%llu, "
5997                                         "ref bytenr=%llu\n",
5998                                         (unsigned long long)rec->start,
5999                                         (unsigned long long)dback->disk_bytenr);
6000                         }
6001
6002                         if (dback->bytes != rec->nr) {
6003                                 err = 1;
6004                                 if (!print_errs)
6005                                         goto out;
6006                                 fprintf(stderr, "Backref bytes do not match "
6007                                         "extent backref, bytenr=%llu, ref "
6008                                         "bytes=%llu, backref bytes=%llu\n",
6009                                         (unsigned long long)rec->start,
6010                                         (unsigned long long)rec->nr,
6011                                         (unsigned long long)dback->bytes);
6012                         }
6013                 }
6014                 if (!back->is_data) {
6015                         found += 1;
6016                 } else {
6017                         dback = to_data_backref(back);
6018                         found += dback->found_ref;
6019                 }
6020         }
6021         if (found != rec->refs) {
6022                 err = 1;
6023                 if (!print_errs)
6024                         goto out;
6025                 fprintf(stderr, "Incorrect global backref count "
6026                         "on %llu found %llu wanted %llu\n",
6027                         (unsigned long long)rec->start,
6028                         (unsigned long long)found,
6029                         (unsigned long long)rec->refs);
6030         }
6031 out:
6032         return err;
6033 }
6034
6035 static void __free_one_backref(struct rb_node *node)
6036 {
6037         struct extent_backref *back = rb_node_to_extent_backref(node);
6038
6039         free(back);
6040 }
6041
6042 static void free_all_extent_backrefs(struct extent_record *rec)
6043 {
6044         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6045 }
6046
6047 static void free_extent_record_cache(struct cache_tree *extent_cache)
6048 {
6049         struct cache_extent *cache;
6050         struct extent_record *rec;
6051
6052         while (1) {
6053                 cache = first_cache_extent(extent_cache);
6054                 if (!cache)
6055                         break;
6056                 rec = container_of(cache, struct extent_record, cache);
6057                 remove_cache_extent(extent_cache, cache);
6058                 free_all_extent_backrefs(rec);
6059                 free(rec);
6060         }
6061 }
6062
6063 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6064                                  struct extent_record *rec)
6065 {
6066         if (rec->content_checked && rec->owner_ref_checked &&
6067             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6068             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6069             !rec->bad_full_backref && !rec->crossing_stripes &&
6070             !rec->wrong_chunk_type) {
6071                 remove_cache_extent(extent_cache, &rec->cache);
6072                 free_all_extent_backrefs(rec);
6073                 list_del_init(&rec->list);
6074                 free(rec);
6075         }
6076         return 0;
6077 }
6078
6079 static int check_owner_ref(struct btrfs_root *root,
6080                             struct extent_record *rec,
6081                             struct extent_buffer *buf)
6082 {
6083         struct extent_backref *node, *tmp;
6084         struct tree_backref *back;
6085         struct btrfs_root *ref_root;
6086         struct btrfs_key key;
6087         struct btrfs_path path;
6088         struct extent_buffer *parent;
6089         int level;
6090         int found = 0;
6091         int ret;
6092
6093         rbtree_postorder_for_each_entry_safe(node, tmp,
6094                                              &rec->backref_tree, node) {
6095                 if (node->is_data)
6096                         continue;
6097                 if (!node->found_ref)
6098                         continue;
6099                 if (node->full_backref)
6100                         continue;
6101                 back = to_tree_backref(node);
6102                 if (btrfs_header_owner(buf) == back->root)
6103                         return 0;
6104         }
6105         BUG_ON(rec->is_root);
6106
6107         /* try to find the block by search corresponding fs tree */
6108         key.objectid = btrfs_header_owner(buf);
6109         key.type = BTRFS_ROOT_ITEM_KEY;
6110         key.offset = (u64)-1;
6111
6112         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6113         if (IS_ERR(ref_root))
6114                 return 1;
6115
6116         level = btrfs_header_level(buf);
6117         if (level == 0)
6118                 btrfs_item_key_to_cpu(buf, &key, 0);
6119         else
6120                 btrfs_node_key_to_cpu(buf, &key, 0);
6121
6122         btrfs_init_path(&path);
6123         path.lowest_level = level + 1;
6124         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6125         if (ret < 0)
6126                 return 0;
6127
6128         parent = path.nodes[level + 1];
6129         if (parent && buf->start == btrfs_node_blockptr(parent,
6130                                                         path.slots[level + 1]))
6131                 found = 1;
6132
6133         btrfs_release_path(&path);
6134         return found ? 0 : 1;
6135 }
6136
6137 static int is_extent_tree_record(struct extent_record *rec)
6138 {
6139         struct extent_backref *node, *tmp;
6140         struct tree_backref *back;
6141         int is_extent = 0;
6142
6143         rbtree_postorder_for_each_entry_safe(node, tmp,
6144                                              &rec->backref_tree, node) {
6145                 if (node->is_data)
6146                         return 0;
6147                 back = to_tree_backref(node);
6148                 if (node->full_backref)
6149                         return 0;
6150                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6151                         is_extent = 1;
6152         }
6153         return is_extent;
6154 }
6155
6156
6157 static int record_bad_block_io(struct btrfs_fs_info *info,
6158                                struct cache_tree *extent_cache,
6159                                u64 start, u64 len)
6160 {
6161         struct extent_record *rec;
6162         struct cache_extent *cache;
6163         struct btrfs_key key;
6164
6165         cache = lookup_cache_extent(extent_cache, start, len);
6166         if (!cache)
6167                 return 0;
6168
6169         rec = container_of(cache, struct extent_record, cache);
6170         if (!is_extent_tree_record(rec))
6171                 return 0;
6172
6173         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6174         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6175 }
6176
6177 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6178                        struct extent_buffer *buf, int slot)
6179 {
6180         if (btrfs_header_level(buf)) {
6181                 struct btrfs_key_ptr ptr1, ptr2;
6182
6183                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6184                                    sizeof(struct btrfs_key_ptr));
6185                 read_extent_buffer(buf, &ptr2,
6186                                    btrfs_node_key_ptr_offset(slot + 1),
6187                                    sizeof(struct btrfs_key_ptr));
6188                 write_extent_buffer(buf, &ptr1,
6189                                     btrfs_node_key_ptr_offset(slot + 1),
6190                                     sizeof(struct btrfs_key_ptr));
6191                 write_extent_buffer(buf, &ptr2,
6192                                     btrfs_node_key_ptr_offset(slot),
6193                                     sizeof(struct btrfs_key_ptr));
6194                 if (slot == 0) {
6195                         struct btrfs_disk_key key;
6196                         btrfs_node_key(buf, &key, 0);
6197                         btrfs_fixup_low_keys(root, path, &key,
6198                                              btrfs_header_level(buf) + 1);
6199                 }
6200         } else {
6201                 struct btrfs_item *item1, *item2;
6202                 struct btrfs_key k1, k2;
6203                 char *item1_data, *item2_data;
6204                 u32 item1_offset, item2_offset, item1_size, item2_size;
6205
6206                 item1 = btrfs_item_nr(slot);
6207                 item2 = btrfs_item_nr(slot + 1);
6208                 btrfs_item_key_to_cpu(buf, &k1, slot);
6209                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6210                 item1_offset = btrfs_item_offset(buf, item1);
6211                 item2_offset = btrfs_item_offset(buf, item2);
6212                 item1_size = btrfs_item_size(buf, item1);
6213                 item2_size = btrfs_item_size(buf, item2);
6214
6215                 item1_data = malloc(item1_size);
6216                 if (!item1_data)
6217                         return -ENOMEM;
6218                 item2_data = malloc(item2_size);
6219                 if (!item2_data) {
6220                         free(item1_data);
6221                         return -ENOMEM;
6222                 }
6223
6224                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6225                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6226
6227                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6228                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6229                 free(item1_data);
6230                 free(item2_data);
6231
6232                 btrfs_set_item_offset(buf, item1, item2_offset);
6233                 btrfs_set_item_offset(buf, item2, item1_offset);
6234                 btrfs_set_item_size(buf, item1, item2_size);
6235                 btrfs_set_item_size(buf, item2, item1_size);
6236
6237                 path->slots[0] = slot;
6238                 btrfs_set_item_key_unsafe(root, path, &k2);
6239                 path->slots[0] = slot + 1;
6240                 btrfs_set_item_key_unsafe(root, path, &k1);
6241         }
6242         return 0;
6243 }
6244
6245 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6246 {
6247         struct extent_buffer *buf;
6248         struct btrfs_key k1, k2;
6249         int i;
6250         int level = path->lowest_level;
6251         int ret = -EIO;
6252
6253         buf = path->nodes[level];
6254         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6255                 if (level) {
6256                         btrfs_node_key_to_cpu(buf, &k1, i);
6257                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6258                 } else {
6259                         btrfs_item_key_to_cpu(buf, &k1, i);
6260                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6261                 }
6262                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6263                         continue;
6264                 ret = swap_values(root, path, buf, i);
6265                 if (ret)
6266                         break;
6267                 btrfs_mark_buffer_dirty(buf);
6268                 i = 0;
6269         }
6270         return ret;
6271 }
6272
6273 static int delete_bogus_item(struct btrfs_root *root,
6274                              struct btrfs_path *path,
6275                              struct extent_buffer *buf, int slot)
6276 {
6277         struct btrfs_key key;
6278         int nritems = btrfs_header_nritems(buf);
6279
6280         btrfs_item_key_to_cpu(buf, &key, slot);
6281
6282         /* These are all the keys we can deal with missing. */
6283         if (key.type != BTRFS_DIR_INDEX_KEY &&
6284             key.type != BTRFS_EXTENT_ITEM_KEY &&
6285             key.type != BTRFS_METADATA_ITEM_KEY &&
6286             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6287             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6288                 return -1;
6289
6290         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6291                (unsigned long long)key.objectid, key.type,
6292                (unsigned long long)key.offset, slot, buf->start);
6293         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6294                               btrfs_item_nr_offset(slot + 1),
6295                               sizeof(struct btrfs_item) *
6296                               (nritems - slot - 1));
6297         btrfs_set_header_nritems(buf, nritems - 1);
6298         if (slot == 0) {
6299                 struct btrfs_disk_key disk_key;
6300
6301                 btrfs_item_key(buf, &disk_key, 0);
6302                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6303         }
6304         btrfs_mark_buffer_dirty(buf);
6305         return 0;
6306 }
6307
6308 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6309 {
6310         struct extent_buffer *buf;
6311         int i;
6312         int ret = 0;
6313
6314         /* We should only get this for leaves */
6315         BUG_ON(path->lowest_level);
6316         buf = path->nodes[0];
6317 again:
6318         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6319                 unsigned int shift = 0, offset;
6320
6321                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6322                     BTRFS_LEAF_DATA_SIZE(root)) {
6323                         if (btrfs_item_end_nr(buf, i) >
6324                             BTRFS_LEAF_DATA_SIZE(root)) {
6325                                 ret = delete_bogus_item(root, path, buf, i);
6326                                 if (!ret)
6327                                         goto again;
6328                                 fprintf(stderr, "item is off the end of the "
6329                                         "leaf, can't fix\n");
6330                                 ret = -EIO;
6331                                 break;
6332                         }
6333                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6334                                 btrfs_item_end_nr(buf, i);
6335                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6336                            btrfs_item_offset_nr(buf, i - 1)) {
6337                         if (btrfs_item_end_nr(buf, i) >
6338                             btrfs_item_offset_nr(buf, i - 1)) {
6339                                 ret = delete_bogus_item(root, path, buf, i);
6340                                 if (!ret)
6341                                         goto again;
6342                                 fprintf(stderr, "items overlap, can't fix\n");
6343                                 ret = -EIO;
6344                                 break;
6345                         }
6346                         shift = btrfs_item_offset_nr(buf, i - 1) -
6347                                 btrfs_item_end_nr(buf, i);
6348                 }
6349                 if (!shift)
6350                         continue;
6351
6352                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6353                        i, shift, (unsigned long long)buf->start);
6354                 offset = btrfs_item_offset_nr(buf, i);
6355                 memmove_extent_buffer(buf,
6356                                       btrfs_leaf_data(buf) + offset + shift,
6357                                       btrfs_leaf_data(buf) + offset,
6358                                       btrfs_item_size_nr(buf, i));
6359                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6360                                       offset + shift);
6361                 btrfs_mark_buffer_dirty(buf);
6362         }
6363
6364         /*
6365          * We may have moved things, in which case we want to exit so we don't
6366          * write those changes out.  Once we have proper abort functionality in
6367          * progs this can be changed to something nicer.
6368          */
6369         BUG_ON(ret);
6370         return ret;
6371 }
6372
6373 /*
6374  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6375  * then just return -EIO.
6376  */
6377 static int try_to_fix_bad_block(struct btrfs_root *root,
6378                                 struct extent_buffer *buf,
6379                                 enum btrfs_tree_block_status status)
6380 {
6381         struct btrfs_trans_handle *trans;
6382         struct ulist *roots;
6383         struct ulist_node *node;
6384         struct btrfs_root *search_root;
6385         struct btrfs_path path;
6386         struct ulist_iterator iter;
6387         struct btrfs_key root_key, key;
6388         int ret;
6389
6390         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6391             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6392                 return -EIO;
6393
6394         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6395         if (ret)
6396                 return -EIO;
6397
6398         btrfs_init_path(&path);
6399         ULIST_ITER_INIT(&iter);
6400         while ((node = ulist_next(roots, &iter))) {
6401                 root_key.objectid = node->val;
6402                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6403                 root_key.offset = (u64)-1;
6404
6405                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6406                 if (IS_ERR(root)) {
6407                         ret = -EIO;
6408                         break;
6409                 }
6410
6411
6412                 trans = btrfs_start_transaction(search_root, 0);
6413                 if (IS_ERR(trans)) {
6414                         ret = PTR_ERR(trans);
6415                         break;
6416                 }
6417
6418                 path.lowest_level = btrfs_header_level(buf);
6419                 path.skip_check_block = 1;
6420                 if (path.lowest_level)
6421                         btrfs_node_key_to_cpu(buf, &key, 0);
6422                 else
6423                         btrfs_item_key_to_cpu(buf, &key, 0);
6424                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6425                 if (ret) {
6426                         ret = -EIO;
6427                         btrfs_commit_transaction(trans, search_root);
6428                         break;
6429                 }
6430                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6431                         ret = fix_key_order(search_root, &path);
6432                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6433                         ret = fix_item_offset(search_root, &path);
6434                 if (ret) {
6435                         btrfs_commit_transaction(trans, search_root);
6436                         break;
6437                 }
6438                 btrfs_release_path(&path);
6439                 btrfs_commit_transaction(trans, search_root);
6440         }
6441         ulist_free(roots);
6442         btrfs_release_path(&path);
6443         return ret;
6444 }
6445
6446 static int check_block(struct btrfs_root *root,
6447                        struct cache_tree *extent_cache,
6448                        struct extent_buffer *buf, u64 flags)
6449 {
6450         struct extent_record *rec;
6451         struct cache_extent *cache;
6452         struct btrfs_key key;
6453         enum btrfs_tree_block_status status;
6454         int ret = 0;
6455         int level;
6456
6457         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6458         if (!cache)
6459                 return 1;
6460         rec = container_of(cache, struct extent_record, cache);
6461         rec->generation = btrfs_header_generation(buf);
6462
6463         level = btrfs_header_level(buf);
6464         if (btrfs_header_nritems(buf) > 0) {
6465
6466                 if (level == 0)
6467                         btrfs_item_key_to_cpu(buf, &key, 0);
6468                 else
6469                         btrfs_node_key_to_cpu(buf, &key, 0);
6470
6471                 rec->info_objectid = key.objectid;
6472         }
6473         rec->info_level = level;
6474
6475         if (btrfs_is_leaf(buf))
6476                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6477         else
6478                 status = btrfs_check_node(root, &rec->parent_key, buf);
6479
6480         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6481                 if (repair)
6482                         status = try_to_fix_bad_block(root, buf, status);
6483                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6484                         ret = -EIO;
6485                         fprintf(stderr, "bad block %llu\n",
6486                                 (unsigned long long)buf->start);
6487                 } else {
6488                         /*
6489                          * Signal to callers we need to start the scan over
6490                          * again since we'll have cowed blocks.
6491                          */
6492                         ret = -EAGAIN;
6493                 }
6494         } else {
6495                 rec->content_checked = 1;
6496                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6497                         rec->owner_ref_checked = 1;
6498                 else {
6499                         ret = check_owner_ref(root, rec, buf);
6500                         if (!ret)
6501                                 rec->owner_ref_checked = 1;
6502                 }
6503         }
6504         if (!ret)
6505                 maybe_free_extent_rec(extent_cache, rec);
6506         return ret;
6507 }
6508
6509 #if 0
6510 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6511                                                 u64 parent, u64 root)
6512 {
6513         struct list_head *cur = rec->backrefs.next;
6514         struct extent_backref *node;
6515         struct tree_backref *back;
6516
6517         while(cur != &rec->backrefs) {
6518                 node = to_extent_backref(cur);
6519                 cur = cur->next;
6520                 if (node->is_data)
6521                         continue;
6522                 back = to_tree_backref(node);
6523                 if (parent > 0) {
6524                         if (!node->full_backref)
6525                                 continue;
6526                         if (parent == back->parent)
6527                                 return back;
6528                 } else {
6529                         if (node->full_backref)
6530                                 continue;
6531                         if (back->root == root)
6532                                 return back;
6533                 }
6534         }
6535         return NULL;
6536 }
6537 #endif
6538
6539 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6540                                                 u64 parent, u64 root)
6541 {
6542         struct tree_backref *ref = malloc(sizeof(*ref));
6543
6544         if (!ref)
6545                 return NULL;
6546         memset(&ref->node, 0, sizeof(ref->node));
6547         if (parent > 0) {
6548                 ref->parent = parent;
6549                 ref->node.full_backref = 1;
6550         } else {
6551                 ref->root = root;
6552                 ref->node.full_backref = 0;
6553         }
6554
6555         return ref;
6556 }
6557
6558 #if 0
6559 static struct data_backref *find_data_backref(struct extent_record *rec,
6560                                                 u64 parent, u64 root,
6561                                                 u64 owner, u64 offset,
6562                                                 int found_ref,
6563                                                 u64 disk_bytenr, u64 bytes)
6564 {
6565         struct list_head *cur = rec->backrefs.next;
6566         struct extent_backref *node;
6567         struct data_backref *back;
6568
6569         while(cur != &rec->backrefs) {
6570                 node = to_extent_backref(cur);
6571                 cur = cur->next;
6572                 if (!node->is_data)
6573                         continue;
6574                 back = to_data_backref(node);
6575                 if (parent > 0) {
6576                         if (!node->full_backref)
6577                                 continue;
6578                         if (parent == back->parent)
6579                                 return back;
6580                 } else {
6581                         if (node->full_backref)
6582                                 continue;
6583                         if (back->root == root && back->owner == owner &&
6584                             back->offset == offset) {
6585                                 if (found_ref && node->found_ref &&
6586                                     (back->bytes != bytes ||
6587                                     back->disk_bytenr != disk_bytenr))
6588                                         continue;
6589                                 return back;
6590                         }
6591                 }
6592         }
6593         return NULL;
6594 }
6595 #endif
6596
6597 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6598                                                 u64 parent, u64 root,
6599                                                 u64 owner, u64 offset,
6600                                                 u64 max_size)
6601 {
6602         struct data_backref *ref = malloc(sizeof(*ref));
6603
6604         if (!ref)
6605                 return NULL;
6606         memset(&ref->node, 0, sizeof(ref->node));
6607         ref->node.is_data = 1;
6608
6609         if (parent > 0) {
6610                 ref->parent = parent;
6611                 ref->owner = 0;
6612                 ref->offset = 0;
6613                 ref->node.full_backref = 1;
6614         } else {
6615                 ref->root = root;
6616                 ref->owner = owner;
6617                 ref->offset = offset;
6618                 ref->node.full_backref = 0;
6619         }
6620         ref->bytes = max_size;
6621         ref->found_ref = 0;
6622         ref->num_refs = 0;
6623         if (max_size > rec->max_size)
6624                 rec->max_size = max_size;
6625         return ref;
6626 }
6627
6628 /* Check if the type of extent matches with its chunk */
6629 static void check_extent_type(struct extent_record *rec)
6630 {
6631         struct btrfs_block_group_cache *bg_cache;
6632
6633         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6634         if (!bg_cache)
6635                 return;
6636
6637         /* data extent, check chunk directly*/
6638         if (!rec->metadata) {
6639                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6640                         rec->wrong_chunk_type = 1;
6641                 return;
6642         }
6643
6644         /* metadata extent, check the obvious case first */
6645         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6646                                  BTRFS_BLOCK_GROUP_METADATA))) {
6647                 rec->wrong_chunk_type = 1;
6648                 return;
6649         }
6650
6651         /*
6652          * Check SYSTEM extent, as it's also marked as metadata, we can only
6653          * make sure it's a SYSTEM extent by its backref
6654          */
6655         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6656                 struct extent_backref *node;
6657                 struct tree_backref *tback;
6658                 u64 bg_type;
6659
6660                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6661                 if (node->is_data) {
6662                         /* tree block shouldn't have data backref */
6663                         rec->wrong_chunk_type = 1;
6664                         return;
6665                 }
6666                 tback = container_of(node, struct tree_backref, node);
6667
6668                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6669                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6670                 else
6671                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6672                 if (!(bg_cache->flags & bg_type))
6673                         rec->wrong_chunk_type = 1;
6674         }
6675 }
6676
6677 /*
6678  * Allocate a new extent record, fill default values from @tmpl and insert int
6679  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6680  * the cache, otherwise it fails.
6681  */
6682 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6683                 struct extent_record *tmpl)
6684 {
6685         struct extent_record *rec;
6686         int ret = 0;
6687
6688         BUG_ON(tmpl->max_size == 0);
6689         rec = malloc(sizeof(*rec));
6690         if (!rec)
6691                 return -ENOMEM;
6692         rec->start = tmpl->start;
6693         rec->max_size = tmpl->max_size;
6694         rec->nr = max(tmpl->nr, tmpl->max_size);
6695         rec->found_rec = tmpl->found_rec;
6696         rec->content_checked = tmpl->content_checked;
6697         rec->owner_ref_checked = tmpl->owner_ref_checked;
6698         rec->num_duplicates = 0;
6699         rec->metadata = tmpl->metadata;
6700         rec->flag_block_full_backref = FLAG_UNSET;
6701         rec->bad_full_backref = 0;
6702         rec->crossing_stripes = 0;
6703         rec->wrong_chunk_type = 0;
6704         rec->is_root = tmpl->is_root;
6705         rec->refs = tmpl->refs;
6706         rec->extent_item_refs = tmpl->extent_item_refs;
6707         rec->parent_generation = tmpl->parent_generation;
6708         INIT_LIST_HEAD(&rec->backrefs);
6709         INIT_LIST_HEAD(&rec->dups);
6710         INIT_LIST_HEAD(&rec->list);
6711         rec->backref_tree = RB_ROOT;
6712         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6713         rec->cache.start = tmpl->start;
6714         rec->cache.size = tmpl->nr;
6715         ret = insert_cache_extent(extent_cache, &rec->cache);
6716         if (ret) {
6717                 free(rec);
6718                 return ret;
6719         }
6720         bytes_used += rec->nr;
6721
6722         if (tmpl->metadata)
6723                 rec->crossing_stripes = check_crossing_stripes(global_info,
6724                                 rec->start, global_info->nodesize);
6725         check_extent_type(rec);
6726         return ret;
6727 }
6728
6729 /*
6730  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6731  * some are hints:
6732  * - refs              - if found, increase refs
6733  * - is_root           - if found, set
6734  * - content_checked   - if found, set
6735  * - owner_ref_checked - if found, set
6736  *
6737  * If not found, create a new one, initialize and insert.
6738  */
6739 static int add_extent_rec(struct cache_tree *extent_cache,
6740                 struct extent_record *tmpl)
6741 {
6742         struct extent_record *rec;
6743         struct cache_extent *cache;
6744         int ret = 0;
6745         int dup = 0;
6746
6747         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6748         if (cache) {
6749                 rec = container_of(cache, struct extent_record, cache);
6750                 if (tmpl->refs)
6751                         rec->refs++;
6752                 if (rec->nr == 1)
6753                         rec->nr = max(tmpl->nr, tmpl->max_size);
6754
6755                 /*
6756                  * We need to make sure to reset nr to whatever the extent
6757                  * record says was the real size, this way we can compare it to
6758                  * the backrefs.
6759                  */
6760                 if (tmpl->found_rec) {
6761                         if (tmpl->start != rec->start || rec->found_rec) {
6762                                 struct extent_record *tmp;
6763
6764                                 dup = 1;
6765                                 if (list_empty(&rec->list))
6766                                         list_add_tail(&rec->list,
6767                                                       &duplicate_extents);
6768
6769                                 /*
6770                                  * We have to do this song and dance in case we
6771                                  * find an extent record that falls inside of
6772                                  * our current extent record but does not have
6773                                  * the same objectid.
6774                                  */
6775                                 tmp = malloc(sizeof(*tmp));
6776                                 if (!tmp)
6777                                         return -ENOMEM;
6778                                 tmp->start = tmpl->start;
6779                                 tmp->max_size = tmpl->max_size;
6780                                 tmp->nr = tmpl->nr;
6781                                 tmp->found_rec = 1;
6782                                 tmp->metadata = tmpl->metadata;
6783                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6784                                 INIT_LIST_HEAD(&tmp->list);
6785                                 list_add_tail(&tmp->list, &rec->dups);
6786                                 rec->num_duplicates++;
6787                         } else {
6788                                 rec->nr = tmpl->nr;
6789                                 rec->found_rec = 1;
6790                         }
6791                 }
6792
6793                 if (tmpl->extent_item_refs && !dup) {
6794                         if (rec->extent_item_refs) {
6795                                 fprintf(stderr, "block %llu rec "
6796                                         "extent_item_refs %llu, passed %llu\n",
6797                                         (unsigned long long)tmpl->start,
6798                                         (unsigned long long)
6799                                                         rec->extent_item_refs,
6800                                         (unsigned long long)tmpl->extent_item_refs);
6801                         }
6802                         rec->extent_item_refs = tmpl->extent_item_refs;
6803                 }
6804                 if (tmpl->is_root)
6805                         rec->is_root = 1;
6806                 if (tmpl->content_checked)
6807                         rec->content_checked = 1;
6808                 if (tmpl->owner_ref_checked)
6809                         rec->owner_ref_checked = 1;
6810                 memcpy(&rec->parent_key, &tmpl->parent_key,
6811                                 sizeof(tmpl->parent_key));
6812                 if (tmpl->parent_generation)
6813                         rec->parent_generation = tmpl->parent_generation;
6814                 if (rec->max_size < tmpl->max_size)
6815                         rec->max_size = tmpl->max_size;
6816
6817                 /*
6818                  * A metadata extent can't cross stripe_len boundary, otherwise
6819                  * kernel scrub won't be able to handle it.
6820                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6821                  * it.
6822                  */
6823                 if (tmpl->metadata)
6824                         rec->crossing_stripes = check_crossing_stripes(
6825                                         global_info, rec->start,
6826                                         global_info->nodesize);
6827                 check_extent_type(rec);
6828                 maybe_free_extent_rec(extent_cache, rec);
6829                 return ret;
6830         }
6831
6832         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6833
6834         return ret;
6835 }
6836
6837 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6838                             u64 parent, u64 root, int found_ref)
6839 {
6840         struct extent_record *rec;
6841         struct tree_backref *back;
6842         struct cache_extent *cache;
6843         int ret;
6844         bool insert = false;
6845
6846         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6847         if (!cache) {
6848                 struct extent_record tmpl;
6849
6850                 memset(&tmpl, 0, sizeof(tmpl));
6851                 tmpl.start = bytenr;
6852                 tmpl.nr = 1;
6853                 tmpl.metadata = 1;
6854                 tmpl.max_size = 1;
6855
6856                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6857                 if (ret)
6858                         return ret;
6859
6860                 /* really a bug in cache_extent implement now */
6861                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6862                 if (!cache)
6863                         return -ENOENT;
6864         }
6865
6866         rec = container_of(cache, struct extent_record, cache);
6867         if (rec->start != bytenr) {
6868                 /*
6869                  * Several cause, from unaligned bytenr to over lapping extents
6870                  */
6871                 return -EEXIST;
6872         }
6873
6874         back = find_tree_backref(rec, parent, root);
6875         if (!back) {
6876                 back = alloc_tree_backref(rec, parent, root);
6877                 if (!back)
6878                         return -ENOMEM;
6879                 insert = true;
6880         }
6881
6882         if (found_ref) {
6883                 if (back->node.found_ref) {
6884                         fprintf(stderr, "Extent back ref already exists "
6885                                 "for %llu parent %llu root %llu \n",
6886                                 (unsigned long long)bytenr,
6887                                 (unsigned long long)parent,
6888                                 (unsigned long long)root);
6889                 }
6890                 back->node.found_ref = 1;
6891         } else {
6892                 if (back->node.found_extent_tree) {
6893                         fprintf(stderr, "Extent back ref already exists "
6894                                 "for %llu parent %llu root %llu \n",
6895                                 (unsigned long long)bytenr,
6896                                 (unsigned long long)parent,
6897                                 (unsigned long long)root);
6898                 }
6899                 back->node.found_extent_tree = 1;
6900         }
6901         if (insert)
6902                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6903                         compare_extent_backref));
6904         check_extent_type(rec);
6905         maybe_free_extent_rec(extent_cache, rec);
6906         return 0;
6907 }
6908
6909 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6910                             u64 parent, u64 root, u64 owner, u64 offset,
6911                             u32 num_refs, int found_ref, u64 max_size)
6912 {
6913         struct extent_record *rec;
6914         struct data_backref *back;
6915         struct cache_extent *cache;
6916         int ret;
6917         bool insert = false;
6918
6919         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6920         if (!cache) {
6921                 struct extent_record tmpl;
6922
6923                 memset(&tmpl, 0, sizeof(tmpl));
6924                 tmpl.start = bytenr;
6925                 tmpl.nr = 1;
6926                 tmpl.max_size = max_size;
6927
6928                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6929                 if (ret)
6930                         return ret;
6931
6932                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6933                 if (!cache)
6934                         abort();
6935         }
6936
6937         rec = container_of(cache, struct extent_record, cache);
6938         if (rec->max_size < max_size)
6939                 rec->max_size = max_size;
6940
6941         /*
6942          * If found_ref is set then max_size is the real size and must match the
6943          * existing refs.  So if we have already found a ref then we need to
6944          * make sure that this ref matches the existing one, otherwise we need
6945          * to add a new backref so we can notice that the backrefs don't match
6946          * and we need to figure out who is telling the truth.  This is to
6947          * account for that awful fsync bug I introduced where we'd end up with
6948          * a btrfs_file_extent_item that would have its length include multiple
6949          * prealloc extents or point inside of a prealloc extent.
6950          */
6951         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6952                                  bytenr, max_size);
6953         if (!back) {
6954                 back = alloc_data_backref(rec, parent, root, owner, offset,
6955                                           max_size);
6956                 BUG_ON(!back);
6957                 insert = true;
6958         }
6959
6960         if (found_ref) {
6961                 BUG_ON(num_refs != 1);
6962                 if (back->node.found_ref)
6963                         BUG_ON(back->bytes != max_size);
6964                 back->node.found_ref = 1;
6965                 back->found_ref += 1;
6966                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6967                         back->bytes = max_size;
6968                         back->disk_bytenr = bytenr;
6969
6970                         /* Need to reinsert if not already in the tree */
6971                         if (!insert) {
6972                                 rb_erase(&back->node.node, &rec->backref_tree);
6973                                 insert = true;
6974                         }
6975                 }
6976                 rec->refs += 1;
6977                 rec->content_checked = 1;
6978                 rec->owner_ref_checked = 1;
6979         } else {
6980                 if (back->node.found_extent_tree) {
6981                         fprintf(stderr, "Extent back ref already exists "
6982                                 "for %llu parent %llu root %llu "
6983                                 "owner %llu offset %llu num_refs %lu\n",
6984                                 (unsigned long long)bytenr,
6985                                 (unsigned long long)parent,
6986                                 (unsigned long long)root,
6987                                 (unsigned long long)owner,
6988                                 (unsigned long long)offset,
6989                                 (unsigned long)num_refs);
6990                 }
6991                 back->num_refs = num_refs;
6992                 back->node.found_extent_tree = 1;
6993         }
6994         if (insert)
6995                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6996                         compare_extent_backref));
6997
6998         maybe_free_extent_rec(extent_cache, rec);
6999         return 0;
7000 }
7001
7002 static int add_pending(struct cache_tree *pending,
7003                        struct cache_tree *seen, u64 bytenr, u32 size)
7004 {
7005         int ret;
7006         ret = add_cache_extent(seen, bytenr, size);
7007         if (ret)
7008                 return ret;
7009         add_cache_extent(pending, bytenr, size);
7010         return 0;
7011 }
7012
7013 static int pick_next_pending(struct cache_tree *pending,
7014                         struct cache_tree *reada,
7015                         struct cache_tree *nodes,
7016                         u64 last, struct block_info *bits, int bits_nr,
7017                         int *reada_bits)
7018 {
7019         unsigned long node_start = last;
7020         struct cache_extent *cache;
7021         int ret;
7022
7023         cache = search_cache_extent(reada, 0);
7024         if (cache) {
7025                 bits[0].start = cache->start;
7026                 bits[0].size = cache->size;
7027                 *reada_bits = 1;
7028                 return 1;
7029         }
7030         *reada_bits = 0;
7031         if (node_start > 32768)
7032                 node_start -= 32768;
7033
7034         cache = search_cache_extent(nodes, node_start);
7035         if (!cache)
7036                 cache = search_cache_extent(nodes, 0);
7037
7038         if (!cache) {
7039                  cache = search_cache_extent(pending, 0);
7040                  if (!cache)
7041                          return 0;
7042                  ret = 0;
7043                  do {
7044                          bits[ret].start = cache->start;
7045                          bits[ret].size = cache->size;
7046                          cache = next_cache_extent(cache);
7047                          ret++;
7048                  } while (cache && ret < bits_nr);
7049                  return ret;
7050         }
7051
7052         ret = 0;
7053         do {
7054                 bits[ret].start = cache->start;
7055                 bits[ret].size = cache->size;
7056                 cache = next_cache_extent(cache);
7057                 ret++;
7058         } while (cache && ret < bits_nr);
7059
7060         if (bits_nr - ret > 8) {
7061                 u64 lookup = bits[0].start + bits[0].size;
7062                 struct cache_extent *next;
7063                 next = search_cache_extent(pending, lookup);
7064                 while(next) {
7065                         if (next->start - lookup > 32768)
7066                                 break;
7067                         bits[ret].start = next->start;
7068                         bits[ret].size = next->size;
7069                         lookup = next->start + next->size;
7070                         ret++;
7071                         if (ret == bits_nr)
7072                                 break;
7073                         next = next_cache_extent(next);
7074                         if (!next)
7075                                 break;
7076                 }
7077         }
7078         return ret;
7079 }
7080
7081 static void free_chunk_record(struct cache_extent *cache)
7082 {
7083         struct chunk_record *rec;
7084
7085         rec = container_of(cache, struct chunk_record, cache);
7086         list_del_init(&rec->list);
7087         list_del_init(&rec->dextents);
7088         free(rec);
7089 }
7090
7091 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7092 {
7093         cache_tree_free_extents(chunk_cache, free_chunk_record);
7094 }
7095
7096 static void free_device_record(struct rb_node *node)
7097 {
7098         struct device_record *rec;
7099
7100         rec = container_of(node, struct device_record, node);
7101         free(rec);
7102 }
7103
7104 FREE_RB_BASED_TREE(device_cache, free_device_record);
7105
7106 int insert_block_group_record(struct block_group_tree *tree,
7107                               struct block_group_record *bg_rec)
7108 {
7109         int ret;
7110
7111         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7112         if (ret)
7113                 return ret;
7114
7115         list_add_tail(&bg_rec->list, &tree->block_groups);
7116         return 0;
7117 }
7118
7119 static void free_block_group_record(struct cache_extent *cache)
7120 {
7121         struct block_group_record *rec;
7122
7123         rec = container_of(cache, struct block_group_record, cache);
7124         list_del_init(&rec->list);
7125         free(rec);
7126 }
7127
7128 void free_block_group_tree(struct block_group_tree *tree)
7129 {
7130         cache_tree_free_extents(&tree->tree, free_block_group_record);
7131 }
7132
7133 int insert_device_extent_record(struct device_extent_tree *tree,
7134                                 struct device_extent_record *de_rec)
7135 {
7136         int ret;
7137
7138         /*
7139          * Device extent is a bit different from the other extents, because
7140          * the extents which belong to the different devices may have the
7141          * same start and size, so we need use the special extent cache
7142          * search/insert functions.
7143          */
7144         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7145         if (ret)
7146                 return ret;
7147
7148         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7149         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7150         return 0;
7151 }
7152
7153 static void free_device_extent_record(struct cache_extent *cache)
7154 {
7155         struct device_extent_record *rec;
7156
7157         rec = container_of(cache, struct device_extent_record, cache);
7158         if (!list_empty(&rec->chunk_list))
7159                 list_del_init(&rec->chunk_list);
7160         if (!list_empty(&rec->device_list))
7161                 list_del_init(&rec->device_list);
7162         free(rec);
7163 }
7164
7165 void free_device_extent_tree(struct device_extent_tree *tree)
7166 {
7167         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7168 }
7169
7170 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7171 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7172                                  struct extent_buffer *leaf, int slot)
7173 {
7174         struct btrfs_extent_ref_v0 *ref0;
7175         struct btrfs_key key;
7176         int ret;
7177
7178         btrfs_item_key_to_cpu(leaf, &key, slot);
7179         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7180         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7181                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7182                                 0, 0);
7183         } else {
7184                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7185                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7186         }
7187         return ret;
7188 }
7189 #endif
7190
7191 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7192                                             struct btrfs_key *key,
7193                                             int slot)
7194 {
7195         struct btrfs_chunk *ptr;
7196         struct chunk_record *rec;
7197         int num_stripes, i;
7198
7199         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7200         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7201
7202         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7203         if (!rec) {
7204                 fprintf(stderr, "memory allocation failed\n");
7205                 exit(-1);
7206         }
7207
7208         INIT_LIST_HEAD(&rec->list);
7209         INIT_LIST_HEAD(&rec->dextents);
7210         rec->bg_rec = NULL;
7211
7212         rec->cache.start = key->offset;
7213         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7214
7215         rec->generation = btrfs_header_generation(leaf);
7216
7217         rec->objectid = key->objectid;
7218         rec->type = key->type;
7219         rec->offset = key->offset;
7220
7221         rec->length = rec->cache.size;
7222         rec->owner = btrfs_chunk_owner(leaf, ptr);
7223         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7224         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7225         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7226         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7227         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7228         rec->num_stripes = num_stripes;
7229         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7230
7231         for (i = 0; i < rec->num_stripes; ++i) {
7232                 rec->stripes[i].devid =
7233                         btrfs_stripe_devid_nr(leaf, ptr, i);
7234                 rec->stripes[i].offset =
7235                         btrfs_stripe_offset_nr(leaf, ptr, i);
7236                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7237                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7238                                 BTRFS_UUID_SIZE);
7239         }
7240
7241         return rec;
7242 }
7243
7244 static int process_chunk_item(struct cache_tree *chunk_cache,
7245                               struct btrfs_key *key, struct extent_buffer *eb,
7246                               int slot)
7247 {
7248         struct chunk_record *rec;
7249         struct btrfs_chunk *chunk;
7250         int ret = 0;
7251
7252         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7253         /*
7254          * Do extra check for this chunk item,
7255          *
7256          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7257          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7258          * and owner<->key_type check.
7259          */
7260         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7261                                       key->offset);
7262         if (ret < 0) {
7263                 error("chunk(%llu, %llu) is not valid, ignore it",
7264                       key->offset, btrfs_chunk_length(eb, chunk));
7265                 return 0;
7266         }
7267         rec = btrfs_new_chunk_record(eb, key, slot);
7268         ret = insert_cache_extent(chunk_cache, &rec->cache);
7269         if (ret) {
7270                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7271                         rec->offset, rec->length);
7272                 free(rec);
7273         }
7274
7275         return ret;
7276 }
7277
7278 static int process_device_item(struct rb_root *dev_cache,
7279                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7280 {
7281         struct btrfs_dev_item *ptr;
7282         struct device_record *rec;
7283         int ret = 0;
7284
7285         ptr = btrfs_item_ptr(eb,
7286                 slot, struct btrfs_dev_item);
7287
7288         rec = malloc(sizeof(*rec));
7289         if (!rec) {
7290                 fprintf(stderr, "memory allocation failed\n");
7291                 return -ENOMEM;
7292         }
7293
7294         rec->devid = key->offset;
7295         rec->generation = btrfs_header_generation(eb);
7296
7297         rec->objectid = key->objectid;
7298         rec->type = key->type;
7299         rec->offset = key->offset;
7300
7301         rec->devid = btrfs_device_id(eb, ptr);
7302         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7303         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7304
7305         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7306         if (ret) {
7307                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7308                 free(rec);
7309         }
7310
7311         return ret;
7312 }
7313
7314 struct block_group_record *
7315 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7316                              int slot)
7317 {
7318         struct btrfs_block_group_item *ptr;
7319         struct block_group_record *rec;
7320
7321         rec = calloc(1, sizeof(*rec));
7322         if (!rec) {
7323                 fprintf(stderr, "memory allocation failed\n");
7324                 exit(-1);
7325         }
7326
7327         rec->cache.start = key->objectid;
7328         rec->cache.size = key->offset;
7329
7330         rec->generation = btrfs_header_generation(leaf);
7331
7332         rec->objectid = key->objectid;
7333         rec->type = key->type;
7334         rec->offset = key->offset;
7335
7336         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7337         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7338
7339         INIT_LIST_HEAD(&rec->list);
7340
7341         return rec;
7342 }
7343
7344 static int process_block_group_item(struct block_group_tree *block_group_cache,
7345                                     struct btrfs_key *key,
7346                                     struct extent_buffer *eb, int slot)
7347 {
7348         struct block_group_record *rec;
7349         int ret = 0;
7350
7351         rec = btrfs_new_block_group_record(eb, key, slot);
7352         ret = insert_block_group_record(block_group_cache, rec);
7353         if (ret) {
7354                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7355                         rec->objectid, rec->offset);
7356                 free(rec);
7357         }
7358
7359         return ret;
7360 }
7361
7362 struct device_extent_record *
7363 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7364                                struct btrfs_key *key, int slot)
7365 {
7366         struct device_extent_record *rec;
7367         struct btrfs_dev_extent *ptr;
7368
7369         rec = calloc(1, sizeof(*rec));
7370         if (!rec) {
7371                 fprintf(stderr, "memory allocation failed\n");
7372                 exit(-1);
7373         }
7374
7375         rec->cache.objectid = key->objectid;
7376         rec->cache.start = key->offset;
7377
7378         rec->generation = btrfs_header_generation(leaf);
7379
7380         rec->objectid = key->objectid;
7381         rec->type = key->type;
7382         rec->offset = key->offset;
7383
7384         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7385         rec->chunk_objecteid =
7386                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7387         rec->chunk_offset =
7388                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7389         rec->length = btrfs_dev_extent_length(leaf, ptr);
7390         rec->cache.size = rec->length;
7391
7392         INIT_LIST_HEAD(&rec->chunk_list);
7393         INIT_LIST_HEAD(&rec->device_list);
7394
7395         return rec;
7396 }
7397
7398 static int
7399 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7400                            struct btrfs_key *key, struct extent_buffer *eb,
7401                            int slot)
7402 {
7403         struct device_extent_record *rec;
7404         int ret;
7405
7406         rec = btrfs_new_device_extent_record(eb, key, slot);
7407         ret = insert_device_extent_record(dev_extent_cache, rec);
7408         if (ret) {
7409                 fprintf(stderr,
7410                         "Device extent[%llu, %llu, %llu] existed.\n",
7411                         rec->objectid, rec->offset, rec->length);
7412                 free(rec);
7413         }
7414
7415         return ret;
7416 }
7417
7418 static int process_extent_item(struct btrfs_root *root,
7419                                struct cache_tree *extent_cache,
7420                                struct extent_buffer *eb, int slot)
7421 {
7422         struct btrfs_extent_item *ei;
7423         struct btrfs_extent_inline_ref *iref;
7424         struct btrfs_extent_data_ref *dref;
7425         struct btrfs_shared_data_ref *sref;
7426         struct btrfs_key key;
7427         struct extent_record tmpl;
7428         unsigned long end;
7429         unsigned long ptr;
7430         int ret;
7431         int type;
7432         u32 item_size = btrfs_item_size_nr(eb, slot);
7433         u64 refs = 0;
7434         u64 offset;
7435         u64 num_bytes;
7436         int metadata = 0;
7437
7438         btrfs_item_key_to_cpu(eb, &key, slot);
7439
7440         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7441                 metadata = 1;
7442                 num_bytes = root->fs_info->nodesize;
7443         } else {
7444                 num_bytes = key.offset;
7445         }
7446
7447         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7448                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7449                       key.objectid, root->fs_info->sectorsize);
7450                 return -EIO;
7451         }
7452         if (item_size < sizeof(*ei)) {
7453 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7454                 struct btrfs_extent_item_v0 *ei0;
7455                 BUG_ON(item_size != sizeof(*ei0));
7456                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7457                 refs = btrfs_extent_refs_v0(eb, ei0);
7458 #else
7459                 BUG();
7460 #endif
7461                 memset(&tmpl, 0, sizeof(tmpl));
7462                 tmpl.start = key.objectid;
7463                 tmpl.nr = num_bytes;
7464                 tmpl.extent_item_refs = refs;
7465                 tmpl.metadata = metadata;
7466                 tmpl.found_rec = 1;
7467                 tmpl.max_size = num_bytes;
7468
7469                 return add_extent_rec(extent_cache, &tmpl);
7470         }
7471
7472         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7473         refs = btrfs_extent_refs(eb, ei);
7474         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7475                 metadata = 1;
7476         else
7477                 metadata = 0;
7478         if (metadata && num_bytes != root->fs_info->nodesize) {
7479                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7480                       num_bytes, root->fs_info->nodesize);
7481                 return -EIO;
7482         }
7483         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7484                 error("ignore invalid data extent, length %llu is not aligned to %u",
7485                       num_bytes, root->fs_info->sectorsize);
7486                 return -EIO;
7487         }
7488
7489         memset(&tmpl, 0, sizeof(tmpl));
7490         tmpl.start = key.objectid;
7491         tmpl.nr = num_bytes;
7492         tmpl.extent_item_refs = refs;
7493         tmpl.metadata = metadata;
7494         tmpl.found_rec = 1;
7495         tmpl.max_size = num_bytes;
7496         add_extent_rec(extent_cache, &tmpl);
7497
7498         ptr = (unsigned long)(ei + 1);
7499         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7500             key.type == BTRFS_EXTENT_ITEM_KEY)
7501                 ptr += sizeof(struct btrfs_tree_block_info);
7502
7503         end = (unsigned long)ei + item_size;
7504         while (ptr < end) {
7505                 iref = (struct btrfs_extent_inline_ref *)ptr;
7506                 type = btrfs_extent_inline_ref_type(eb, iref);
7507                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7508                 switch (type) {
7509                 case BTRFS_TREE_BLOCK_REF_KEY:
7510                         ret = add_tree_backref(extent_cache, key.objectid,
7511                                         0, offset, 0);
7512                         if (ret < 0)
7513                                 error(
7514                         "add_tree_backref failed (extent items tree block): %s",
7515                                       strerror(-ret));
7516                         break;
7517                 case BTRFS_SHARED_BLOCK_REF_KEY:
7518                         ret = add_tree_backref(extent_cache, key.objectid,
7519                                         offset, 0, 0);
7520                         if (ret < 0)
7521                                 error(
7522                         "add_tree_backref failed (extent items shared block): %s",
7523                                       strerror(-ret));
7524                         break;
7525                 case BTRFS_EXTENT_DATA_REF_KEY:
7526                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7527                         add_data_backref(extent_cache, key.objectid, 0,
7528                                         btrfs_extent_data_ref_root(eb, dref),
7529                                         btrfs_extent_data_ref_objectid(eb,
7530                                                                        dref),
7531                                         btrfs_extent_data_ref_offset(eb, dref),
7532                                         btrfs_extent_data_ref_count(eb, dref),
7533                                         0, num_bytes);
7534                         break;
7535                 case BTRFS_SHARED_DATA_REF_KEY:
7536                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7537                         add_data_backref(extent_cache, key.objectid, offset,
7538                                         0, 0, 0,
7539                                         btrfs_shared_data_ref_count(eb, sref),
7540                                         0, num_bytes);
7541                         break;
7542                 default:
7543                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7544                                 key.objectid, key.type, num_bytes);
7545                         goto out;
7546                 }
7547                 ptr += btrfs_extent_inline_ref_size(type);
7548         }
7549         WARN_ON(ptr > end);
7550 out:
7551         return 0;
7552 }
7553
7554 static int check_cache_range(struct btrfs_root *root,
7555                              struct btrfs_block_group_cache *cache,
7556                              u64 offset, u64 bytes)
7557 {
7558         struct btrfs_free_space *entry;
7559         u64 *logical;
7560         u64 bytenr;
7561         int stripe_len;
7562         int i, nr, ret;
7563
7564         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7565                 bytenr = btrfs_sb_offset(i);
7566                 ret = btrfs_rmap_block(root->fs_info,
7567                                        cache->key.objectid, bytenr, 0,
7568                                        &logical, &nr, &stripe_len);
7569                 if (ret)
7570                         return ret;
7571
7572                 while (nr--) {
7573                         if (logical[nr] + stripe_len <= offset)
7574                                 continue;
7575                         if (offset + bytes <= logical[nr])
7576                                 continue;
7577                         if (logical[nr] == offset) {
7578                                 if (stripe_len >= bytes) {
7579                                         free(logical);
7580                                         return 0;
7581                                 }
7582                                 bytes -= stripe_len;
7583                                 offset += stripe_len;
7584                         } else if (logical[nr] < offset) {
7585                                 if (logical[nr] + stripe_len >=
7586                                     offset + bytes) {
7587                                         free(logical);
7588                                         return 0;
7589                                 }
7590                                 bytes = (offset + bytes) -
7591                                         (logical[nr] + stripe_len);
7592                                 offset = logical[nr] + stripe_len;
7593                         } else {
7594                                 /*
7595                                  * Could be tricky, the super may land in the
7596                                  * middle of the area we're checking.  First
7597                                  * check the easiest case, it's at the end.
7598                                  */
7599                                 if (logical[nr] + stripe_len >=
7600                                     bytes + offset) {
7601                                         bytes = logical[nr] - offset;
7602                                         continue;
7603                                 }
7604
7605                                 /* Check the left side */
7606                                 ret = check_cache_range(root, cache,
7607                                                         offset,
7608                                                         logical[nr] - offset);
7609                                 if (ret) {
7610                                         free(logical);
7611                                         return ret;
7612                                 }
7613
7614                                 /* Now we continue with the right side */
7615                                 bytes = (offset + bytes) -
7616                                         (logical[nr] + stripe_len);
7617                                 offset = logical[nr] + stripe_len;
7618                         }
7619                 }
7620
7621                 free(logical);
7622         }
7623
7624         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7625         if (!entry) {
7626                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7627                         offset, offset+bytes);
7628                 return -EINVAL;
7629         }
7630
7631         if (entry->offset != offset) {
7632                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7633                         entry->offset);
7634                 return -EINVAL;
7635         }
7636
7637         if (entry->bytes != bytes) {
7638                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7639                         bytes, entry->bytes, offset);
7640                 return -EINVAL;
7641         }
7642
7643         unlink_free_space(cache->free_space_ctl, entry);
7644         free(entry);
7645         return 0;
7646 }
7647
7648 static int verify_space_cache(struct btrfs_root *root,
7649                               struct btrfs_block_group_cache *cache)
7650 {
7651         struct btrfs_path path;
7652         struct extent_buffer *leaf;
7653         struct btrfs_key key;
7654         u64 last;
7655         int ret = 0;
7656
7657         root = root->fs_info->extent_root;
7658
7659         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7660
7661         btrfs_init_path(&path);
7662         key.objectid = last;
7663         key.offset = 0;
7664         key.type = BTRFS_EXTENT_ITEM_KEY;
7665         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7666         if (ret < 0)
7667                 goto out;
7668         ret = 0;
7669         while (1) {
7670                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7671                         ret = btrfs_next_leaf(root, &path);
7672                         if (ret < 0)
7673                                 goto out;
7674                         if (ret > 0) {
7675                                 ret = 0;
7676                                 break;
7677                         }
7678                 }
7679                 leaf = path.nodes[0];
7680                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7681                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7682                         break;
7683                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7684                     key.type != BTRFS_METADATA_ITEM_KEY) {
7685                         path.slots[0]++;
7686                         continue;
7687                 }
7688
7689                 if (last == key.objectid) {
7690                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7691                                 last = key.objectid + key.offset;
7692                         else
7693                                 last = key.objectid + root->fs_info->nodesize;
7694                         path.slots[0]++;
7695                         continue;
7696                 }
7697
7698                 ret = check_cache_range(root, cache, last,
7699                                         key.objectid - last);
7700                 if (ret)
7701                         break;
7702                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7703                         last = key.objectid + key.offset;
7704                 else
7705                         last = key.objectid + root->fs_info->nodesize;
7706                 path.slots[0]++;
7707         }
7708
7709         if (last < cache->key.objectid + cache->key.offset)
7710                 ret = check_cache_range(root, cache, last,
7711                                         cache->key.objectid +
7712                                         cache->key.offset - last);
7713
7714 out:
7715         btrfs_release_path(&path);
7716
7717         if (!ret &&
7718             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7719                 fprintf(stderr, "There are still entries left in the space "
7720                         "cache\n");
7721                 ret = -EINVAL;
7722         }
7723
7724         return ret;
7725 }
7726
7727 static int check_space_cache(struct btrfs_root *root)
7728 {
7729         struct btrfs_block_group_cache *cache;
7730         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7731         int ret;
7732         int error = 0;
7733
7734         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7735             btrfs_super_generation(root->fs_info->super_copy) !=
7736             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7737                 printf("cache and super generation don't match, space cache "
7738                        "will be invalidated\n");
7739                 return 0;
7740         }
7741
7742         if (ctx.progress_enabled) {
7743                 ctx.tp = TASK_FREE_SPACE;
7744                 task_start(ctx.info);
7745         }
7746
7747         while (1) {
7748                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7749                 if (!cache)
7750                         break;
7751
7752                 start = cache->key.objectid + cache->key.offset;
7753                 if (!cache->free_space_ctl) {
7754                         if (btrfs_init_free_space_ctl(cache,
7755                                                 root->fs_info->sectorsize)) {
7756                                 ret = -ENOMEM;
7757                                 break;
7758                         }
7759                 } else {
7760                         btrfs_remove_free_space_cache(cache);
7761                 }
7762
7763                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7764                         ret = exclude_super_stripes(root, cache);
7765                         if (ret) {
7766                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7767                                         strerror(-ret));
7768                                 error++;
7769                                 continue;
7770                         }
7771                         ret = load_free_space_tree(root->fs_info, cache);
7772                         free_excluded_extents(root, cache);
7773                         if (ret < 0) {
7774                                 fprintf(stderr, "could not load free space tree: %s\n",
7775                                         strerror(-ret));
7776                                 error++;
7777                                 continue;
7778                         }
7779                         error += ret;
7780                 } else {
7781                         ret = load_free_space_cache(root->fs_info, cache);
7782                         if (!ret)
7783                                 continue;
7784                 }
7785
7786                 ret = verify_space_cache(root, cache);
7787                 if (ret) {
7788                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7789                                 cache->key.objectid);
7790                         error++;
7791                 }
7792         }
7793
7794         task_stop(ctx.info);
7795
7796         return error ? -EINVAL : 0;
7797 }
7798
7799 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7800                         u64 num_bytes, unsigned long leaf_offset,
7801                         struct extent_buffer *eb) {
7802
7803         struct btrfs_fs_info *fs_info = root->fs_info;
7804         u64 offset = 0;
7805         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7806         char *data;
7807         unsigned long csum_offset;
7808         u32 csum;
7809         u32 csum_expected;
7810         u64 read_len;
7811         u64 data_checked = 0;
7812         u64 tmp;
7813         int ret = 0;
7814         int mirror;
7815         int num_copies;
7816
7817         if (num_bytes % fs_info->sectorsize)
7818                 return -EINVAL;
7819
7820         data = malloc(num_bytes);
7821         if (!data)
7822                 return -ENOMEM;
7823
7824         while (offset < num_bytes) {
7825                 mirror = 0;
7826 again:
7827                 read_len = num_bytes - offset;
7828                 /* read as much space once a time */
7829                 ret = read_extent_data(fs_info, data + offset,
7830                                 bytenr + offset, &read_len, mirror);
7831                 if (ret)
7832                         goto out;
7833                 data_checked = 0;
7834                 /* verify every 4k data's checksum */
7835                 while (data_checked < read_len) {
7836                         csum = ~(u32)0;
7837                         tmp = offset + data_checked;
7838
7839                         csum = btrfs_csum_data((char *)data + tmp,
7840                                                csum, fs_info->sectorsize);
7841                         btrfs_csum_final(csum, (u8 *)&csum);
7842
7843                         csum_offset = leaf_offset +
7844                                  tmp / fs_info->sectorsize * csum_size;
7845                         read_extent_buffer(eb, (char *)&csum_expected,
7846                                            csum_offset, csum_size);
7847                         /* try another mirror */
7848                         if (csum != csum_expected) {
7849                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7850                                                 mirror, bytenr + tmp,
7851                                                 csum, csum_expected);
7852                                 num_copies = btrfs_num_copies(root->fs_info,
7853                                                 bytenr, num_bytes);
7854                                 if (mirror < num_copies - 1) {
7855                                         mirror += 1;
7856                                         goto again;
7857                                 }
7858                         }
7859                         data_checked += fs_info->sectorsize;
7860                 }
7861                 offset += read_len;
7862         }
7863 out:
7864         free(data);
7865         return ret;
7866 }
7867
7868 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7869                                u64 num_bytes)
7870 {
7871         struct btrfs_path path;
7872         struct extent_buffer *leaf;
7873         struct btrfs_key key;
7874         int ret;
7875
7876         btrfs_init_path(&path);
7877         key.objectid = bytenr;
7878         key.type = BTRFS_EXTENT_ITEM_KEY;
7879         key.offset = (u64)-1;
7880
7881 again:
7882         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7883                                 0, 0);
7884         if (ret < 0) {
7885                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7886                 btrfs_release_path(&path);
7887                 return ret;
7888         } else if (ret) {
7889                 if (path.slots[0] > 0) {
7890                         path.slots[0]--;
7891                 } else {
7892                         ret = btrfs_prev_leaf(root, &path);
7893                         if (ret < 0) {
7894                                 goto out;
7895                         } else if (ret > 0) {
7896                                 ret = 0;
7897                                 goto out;
7898                         }
7899                 }
7900         }
7901
7902         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7903
7904         /*
7905          * Block group items come before extent items if they have the same
7906          * bytenr, so walk back one more just in case.  Dear future traveller,
7907          * first congrats on mastering time travel.  Now if it's not too much
7908          * trouble could you go back to 2006 and tell Chris to make the
7909          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7910          * EXTENT_ITEM_KEY please?
7911          */
7912         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7913                 if (path.slots[0] > 0) {
7914                         path.slots[0]--;
7915                 } else {
7916                         ret = btrfs_prev_leaf(root, &path);
7917                         if (ret < 0) {
7918                                 goto out;
7919                         } else if (ret > 0) {
7920                                 ret = 0;
7921                                 goto out;
7922                         }
7923                 }
7924                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7925         }
7926
7927         while (num_bytes) {
7928                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7929                         ret = btrfs_next_leaf(root, &path);
7930                         if (ret < 0) {
7931                                 fprintf(stderr, "Error going to next leaf "
7932                                         "%d\n", ret);
7933                                 btrfs_release_path(&path);
7934                                 return ret;
7935                         } else if (ret) {
7936                                 break;
7937                         }
7938                 }
7939                 leaf = path.nodes[0];
7940                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7941                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7942                         path.slots[0]++;
7943                         continue;
7944                 }
7945                 if (key.objectid + key.offset < bytenr) {
7946                         path.slots[0]++;
7947                         continue;
7948                 }
7949                 if (key.objectid > bytenr + num_bytes)
7950                         break;
7951
7952                 if (key.objectid == bytenr) {
7953                         if (key.offset >= num_bytes) {
7954                                 num_bytes = 0;
7955                                 break;
7956                         }
7957                         num_bytes -= key.offset;
7958                         bytenr += key.offset;
7959                 } else if (key.objectid < bytenr) {
7960                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7961                                 num_bytes = 0;
7962                                 break;
7963                         }
7964                         num_bytes = (bytenr + num_bytes) -
7965                                 (key.objectid + key.offset);
7966                         bytenr = key.objectid + key.offset;
7967                 } else {
7968                         if (key.objectid + key.offset < bytenr + num_bytes) {
7969                                 u64 new_start = key.objectid + key.offset;
7970                                 u64 new_bytes = bytenr + num_bytes - new_start;
7971
7972                                 /*
7973                                  * Weird case, the extent is in the middle of
7974                                  * our range, we'll have to search one side
7975                                  * and then the other.  Not sure if this happens
7976                                  * in real life, but no harm in coding it up
7977                                  * anyway just in case.
7978                                  */
7979                                 btrfs_release_path(&path);
7980                                 ret = check_extent_exists(root, new_start,
7981                                                           new_bytes);
7982                                 if (ret) {
7983                                         fprintf(stderr, "Right section didn't "
7984                                                 "have a record\n");
7985                                         break;
7986                                 }
7987                                 num_bytes = key.objectid - bytenr;
7988                                 goto again;
7989                         }
7990                         num_bytes = key.objectid - bytenr;
7991                 }
7992                 path.slots[0]++;
7993         }
7994         ret = 0;
7995
7996 out:
7997         if (num_bytes && !ret) {
7998                 fprintf(stderr, "There are no extents for csum range "
7999                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8000                 ret = 1;
8001         }
8002
8003         btrfs_release_path(&path);
8004         return ret;
8005 }
8006
8007 static int check_csums(struct btrfs_root *root)
8008 {
8009         struct btrfs_path path;
8010         struct extent_buffer *leaf;
8011         struct btrfs_key key;
8012         u64 offset = 0, num_bytes = 0;
8013         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8014         int errors = 0;
8015         int ret;
8016         u64 data_len;
8017         unsigned long leaf_offset;
8018
8019         root = root->fs_info->csum_root;
8020         if (!extent_buffer_uptodate(root->node)) {
8021                 fprintf(stderr, "No valid csum tree found\n");
8022                 return -ENOENT;
8023         }
8024
8025         btrfs_init_path(&path);
8026         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8027         key.type = BTRFS_EXTENT_CSUM_KEY;
8028         key.offset = 0;
8029         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8030         if (ret < 0) {
8031                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8032                 btrfs_release_path(&path);
8033                 return ret;
8034         }
8035
8036         if (ret > 0 && path.slots[0])
8037                 path.slots[0]--;
8038         ret = 0;
8039
8040         while (1) {
8041                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8042                         ret = btrfs_next_leaf(root, &path);
8043                         if (ret < 0) {
8044                                 fprintf(stderr, "Error going to next leaf "
8045                                         "%d\n", ret);
8046                                 break;
8047                         }
8048                         if (ret)
8049                                 break;
8050                 }
8051                 leaf = path.nodes[0];
8052
8053                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8054                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8055                         path.slots[0]++;
8056                         continue;
8057                 }
8058
8059                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8060                               csum_size) * root->fs_info->sectorsize;
8061                 if (!check_data_csum)
8062                         goto skip_csum_check;
8063                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8064                 ret = check_extent_csums(root, key.offset, data_len,
8065                                          leaf_offset, leaf);
8066                 if (ret)
8067                         break;
8068 skip_csum_check:
8069                 if (!num_bytes) {
8070                         offset = key.offset;
8071                 } else if (key.offset != offset + num_bytes) {
8072                         ret = check_extent_exists(root, offset, num_bytes);
8073                         if (ret) {
8074                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8075                                         "there is no extent record\n",
8076                                         offset, offset+num_bytes);
8077                                 errors++;
8078                         }
8079                         offset = key.offset;
8080                         num_bytes = 0;
8081                 }
8082                 num_bytes += data_len;
8083                 path.slots[0]++;
8084         }
8085
8086         btrfs_release_path(&path);
8087         return errors;
8088 }
8089
8090 static int is_dropped_key(struct btrfs_key *key,
8091                           struct btrfs_key *drop_key) {
8092         if (key->objectid < drop_key->objectid)
8093                 return 1;
8094         else if (key->objectid == drop_key->objectid) {
8095                 if (key->type < drop_key->type)
8096                         return 1;
8097                 else if (key->type == drop_key->type) {
8098                         if (key->offset < drop_key->offset)
8099                                 return 1;
8100                 }
8101         }
8102         return 0;
8103 }
8104
8105 /*
8106  * Here are the rules for FULL_BACKREF.
8107  *
8108  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8109  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8110  *      FULL_BACKREF set.
8111  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8112  *    if it happened after the relocation occurred since we'll have dropped the
8113  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8114  *    have no real way to know for sure.
8115  *
8116  * We process the blocks one root at a time, and we start from the lowest root
8117  * objectid and go to the highest.  So we can just lookup the owner backref for
8118  * the record and if we don't find it then we know it doesn't exist and we have
8119  * a FULL BACKREF.
8120  *
8121  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8122  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8123  * be set or not and then we can check later once we've gathered all the refs.
8124  */
8125 static int calc_extent_flag(struct cache_tree *extent_cache,
8126                            struct extent_buffer *buf,
8127                            struct root_item_record *ri,
8128                            u64 *flags)
8129 {
8130         struct extent_record *rec;
8131         struct cache_extent *cache;
8132         struct tree_backref *tback;
8133         u64 owner = 0;
8134
8135         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8136         /* we have added this extent before */
8137         if (!cache)
8138                 return -ENOENT;
8139
8140         rec = container_of(cache, struct extent_record, cache);
8141
8142         /*
8143          * Except file/reloc tree, we can not have
8144          * FULL BACKREF MODE
8145          */
8146         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8147                 goto normal;
8148         /*
8149          * root node
8150          */
8151         if (buf->start == ri->bytenr)
8152                 goto normal;
8153
8154         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8155                 goto full_backref;
8156
8157         owner = btrfs_header_owner(buf);
8158         if (owner == ri->objectid)
8159                 goto normal;
8160
8161         tback = find_tree_backref(rec, 0, owner);
8162         if (!tback)
8163                 goto full_backref;
8164 normal:
8165         *flags = 0;
8166         if (rec->flag_block_full_backref != FLAG_UNSET &&
8167             rec->flag_block_full_backref != 0)
8168                 rec->bad_full_backref = 1;
8169         return 0;
8170 full_backref:
8171         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8172         if (rec->flag_block_full_backref != FLAG_UNSET &&
8173             rec->flag_block_full_backref != 1)
8174                 rec->bad_full_backref = 1;
8175         return 0;
8176 }
8177
8178 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8179 {
8180         fprintf(stderr, "Invalid key type(");
8181         print_key_type(stderr, 0, key_type);
8182         fprintf(stderr, ") found in root(");
8183         print_objectid(stderr, rootid, 0);
8184         fprintf(stderr, ")\n");
8185 }
8186
8187 /*
8188  * Check if the key is valid with its extent buffer.
8189  *
8190  * This is a early check in case invalid key exists in a extent buffer
8191  * This is not comprehensive yet, but should prevent wrong key/item passed
8192  * further
8193  */
8194 static int check_type_with_root(u64 rootid, u8 key_type)
8195 {
8196         switch (key_type) {
8197         /* Only valid in chunk tree */
8198         case BTRFS_DEV_ITEM_KEY:
8199         case BTRFS_CHUNK_ITEM_KEY:
8200                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8201                         goto err;
8202                 break;
8203         /* valid in csum and log tree */
8204         case BTRFS_CSUM_TREE_OBJECTID:
8205                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8206                       is_fstree(rootid)))
8207                         goto err;
8208                 break;
8209         case BTRFS_EXTENT_ITEM_KEY:
8210         case BTRFS_METADATA_ITEM_KEY:
8211         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8212                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8213                         goto err;
8214                 break;
8215         case BTRFS_ROOT_ITEM_KEY:
8216                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8217                         goto err;
8218                 break;
8219         case BTRFS_DEV_EXTENT_KEY:
8220                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8221                         goto err;
8222                 break;
8223         }
8224         return 0;
8225 err:
8226         report_mismatch_key_root(key_type, rootid);
8227         return -EINVAL;
8228 }
8229
8230 static int run_next_block(struct btrfs_root *root,
8231                           struct block_info *bits,
8232                           int bits_nr,
8233                           u64 *last,
8234                           struct cache_tree *pending,
8235                           struct cache_tree *seen,
8236                           struct cache_tree *reada,
8237                           struct cache_tree *nodes,
8238                           struct cache_tree *extent_cache,
8239                           struct cache_tree *chunk_cache,
8240                           struct rb_root *dev_cache,
8241                           struct block_group_tree *block_group_cache,
8242                           struct device_extent_tree *dev_extent_cache,
8243                           struct root_item_record *ri)
8244 {
8245         struct btrfs_fs_info *fs_info = root->fs_info;
8246         struct extent_buffer *buf;
8247         struct extent_record *rec = NULL;
8248         u64 bytenr;
8249         u32 size;
8250         u64 parent;
8251         u64 owner;
8252         u64 flags;
8253         u64 ptr;
8254         u64 gen = 0;
8255         int ret = 0;
8256         int i;
8257         int nritems;
8258         struct btrfs_key key;
8259         struct cache_extent *cache;
8260         int reada_bits;
8261
8262         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8263                                     bits_nr, &reada_bits);
8264         if (nritems == 0)
8265                 return 1;
8266
8267         if (!reada_bits) {
8268                 for(i = 0; i < nritems; i++) {
8269                         ret = add_cache_extent(reada, bits[i].start,
8270                                                bits[i].size);
8271                         if (ret == -EEXIST)
8272                                 continue;
8273
8274                         /* fixme, get the parent transid */
8275                         readahead_tree_block(fs_info, bits[i].start, 0);
8276                 }
8277         }
8278         *last = bits[0].start;
8279         bytenr = bits[0].start;
8280         size = bits[0].size;
8281
8282         cache = lookup_cache_extent(pending, bytenr, size);
8283         if (cache) {
8284                 remove_cache_extent(pending, cache);
8285                 free(cache);
8286         }
8287         cache = lookup_cache_extent(reada, bytenr, size);
8288         if (cache) {
8289                 remove_cache_extent(reada, cache);
8290                 free(cache);
8291         }
8292         cache = lookup_cache_extent(nodes, bytenr, size);
8293         if (cache) {
8294                 remove_cache_extent(nodes, cache);
8295                 free(cache);
8296         }
8297         cache = lookup_cache_extent(extent_cache, bytenr, size);
8298         if (cache) {
8299                 rec = container_of(cache, struct extent_record, cache);
8300                 gen = rec->parent_generation;
8301         }
8302
8303         /* fixme, get the real parent transid */
8304         buf = read_tree_block(root->fs_info, bytenr, gen);
8305         if (!extent_buffer_uptodate(buf)) {
8306                 record_bad_block_io(root->fs_info,
8307                                     extent_cache, bytenr, size);
8308                 goto out;
8309         }
8310
8311         nritems = btrfs_header_nritems(buf);
8312
8313         flags = 0;
8314         if (!init_extent_tree) {
8315                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8316                                        btrfs_header_level(buf), 1, NULL,
8317                                        &flags);
8318                 if (ret < 0) {
8319                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8320                         if (ret < 0) {
8321                                 fprintf(stderr, "Couldn't calc extent flags\n");
8322                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8323                         }
8324                 }
8325         } else {
8326                 flags = 0;
8327                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8328                 if (ret < 0) {
8329                         fprintf(stderr, "Couldn't calc extent flags\n");
8330                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8331                 }
8332         }
8333
8334         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8335                 if (ri != NULL &&
8336                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8337                     ri->objectid == btrfs_header_owner(buf)) {
8338                         /*
8339                          * Ok we got to this block from it's original owner and
8340                          * we have FULL_BACKREF set.  Relocation can leave
8341                          * converted blocks over so this is altogether possible,
8342                          * however it's not possible if the generation > the
8343                          * last snapshot, so check for this case.
8344                          */
8345                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8346                             btrfs_header_generation(buf) > ri->last_snapshot) {
8347                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8348                                 rec->bad_full_backref = 1;
8349                         }
8350                 }
8351         } else {
8352                 if (ri != NULL &&
8353                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8354                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8355                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8356                         rec->bad_full_backref = 1;
8357                 }
8358         }
8359
8360         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8361                 rec->flag_block_full_backref = 1;
8362                 parent = bytenr;
8363                 owner = 0;
8364         } else {
8365                 rec->flag_block_full_backref = 0;
8366                 parent = 0;
8367                 owner = btrfs_header_owner(buf);
8368         }
8369
8370         ret = check_block(root, extent_cache, buf, flags);
8371         if (ret)
8372                 goto out;
8373
8374         if (btrfs_is_leaf(buf)) {
8375                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8376                 for (i = 0; i < nritems; i++) {
8377                         struct btrfs_file_extent_item *fi;
8378                         btrfs_item_key_to_cpu(buf, &key, i);
8379                         /*
8380                          * Check key type against the leaf owner.
8381                          * Could filter quite a lot of early error if
8382                          * owner is correct
8383                          */
8384                         if (check_type_with_root(btrfs_header_owner(buf),
8385                                                  key.type)) {
8386                                 fprintf(stderr, "ignoring invalid key\n");
8387                                 continue;
8388                         }
8389                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8390                                 process_extent_item(root, extent_cache, buf,
8391                                                     i);
8392                                 continue;
8393                         }
8394                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8395                                 process_extent_item(root, extent_cache, buf,
8396                                                     i);
8397                                 continue;
8398                         }
8399                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8400                                 total_csum_bytes +=
8401                                         btrfs_item_size_nr(buf, i);
8402                                 continue;
8403                         }
8404                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8405                                 process_chunk_item(chunk_cache, &key, buf, i);
8406                                 continue;
8407                         }
8408                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8409                                 process_device_item(dev_cache, &key, buf, i);
8410                                 continue;
8411                         }
8412                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8413                                 process_block_group_item(block_group_cache,
8414                                         &key, buf, i);
8415                                 continue;
8416                         }
8417                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8418                                 process_device_extent_item(dev_extent_cache,
8419                                         &key, buf, i);
8420                                 continue;
8421
8422                         }
8423                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8424 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8425                                 process_extent_ref_v0(extent_cache, buf, i);
8426 #else
8427                                 BUG();
8428 #endif
8429                                 continue;
8430                         }
8431
8432                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8433                                 ret = add_tree_backref(extent_cache,
8434                                                 key.objectid, 0, key.offset, 0);
8435                                 if (ret < 0)
8436                                         error(
8437                                 "add_tree_backref failed (leaf tree block): %s",
8438                                               strerror(-ret));
8439                                 continue;
8440                         }
8441                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8442                                 ret = add_tree_backref(extent_cache,
8443                                                 key.objectid, key.offset, 0, 0);
8444                                 if (ret < 0)
8445                                         error(
8446                                 "add_tree_backref failed (leaf shared block): %s",
8447                                               strerror(-ret));
8448                                 continue;
8449                         }
8450                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8451                                 struct btrfs_extent_data_ref *ref;
8452                                 ref = btrfs_item_ptr(buf, i,
8453                                                 struct btrfs_extent_data_ref);
8454                                 add_data_backref(extent_cache,
8455                                         key.objectid, 0,
8456                                         btrfs_extent_data_ref_root(buf, ref),
8457                                         btrfs_extent_data_ref_objectid(buf,
8458                                                                        ref),
8459                                         btrfs_extent_data_ref_offset(buf, ref),
8460                                         btrfs_extent_data_ref_count(buf, ref),
8461                                         0, root->fs_info->sectorsize);
8462                                 continue;
8463                         }
8464                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8465                                 struct btrfs_shared_data_ref *ref;
8466                                 ref = btrfs_item_ptr(buf, i,
8467                                                 struct btrfs_shared_data_ref);
8468                                 add_data_backref(extent_cache,
8469                                         key.objectid, key.offset, 0, 0, 0,
8470                                         btrfs_shared_data_ref_count(buf, ref),
8471                                         0, root->fs_info->sectorsize);
8472                                 continue;
8473                         }
8474                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8475                                 struct bad_item *bad;
8476
8477                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8478                                         continue;
8479                                 if (!owner)
8480                                         continue;
8481                                 bad = malloc(sizeof(struct bad_item));
8482                                 if (!bad)
8483                                         continue;
8484                                 INIT_LIST_HEAD(&bad->list);
8485                                 memcpy(&bad->key, &key,
8486                                        sizeof(struct btrfs_key));
8487                                 bad->root_id = owner;
8488                                 list_add_tail(&bad->list, &delete_items);
8489                                 continue;
8490                         }
8491                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8492                                 continue;
8493                         fi = btrfs_item_ptr(buf, i,
8494                                             struct btrfs_file_extent_item);
8495                         if (btrfs_file_extent_type(buf, fi) ==
8496                             BTRFS_FILE_EXTENT_INLINE)
8497                                 continue;
8498                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8499                                 continue;
8500
8501                         data_bytes_allocated +=
8502                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8503                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8504                                 abort();
8505                         }
8506                         data_bytes_referenced +=
8507                                 btrfs_file_extent_num_bytes(buf, fi);
8508                         add_data_backref(extent_cache,
8509                                 btrfs_file_extent_disk_bytenr(buf, fi),
8510                                 parent, owner, key.objectid, key.offset -
8511                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8512                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8513                 }
8514         } else {
8515                 int level;
8516                 struct btrfs_key first_key;
8517
8518                 first_key.objectid = 0;
8519
8520                 if (nritems > 0)
8521                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8522                 level = btrfs_header_level(buf);
8523                 for (i = 0; i < nritems; i++) {
8524                         struct extent_record tmpl;
8525
8526                         ptr = btrfs_node_blockptr(buf, i);
8527                         size = root->fs_info->nodesize;
8528                         btrfs_node_key_to_cpu(buf, &key, i);
8529                         if (ri != NULL) {
8530                                 if ((level == ri->drop_level)
8531                                     && is_dropped_key(&key, &ri->drop_key)) {
8532                                         continue;
8533                                 }
8534                         }
8535
8536                         memset(&tmpl, 0, sizeof(tmpl));
8537                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8538                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8539                         tmpl.start = ptr;
8540                         tmpl.nr = size;
8541                         tmpl.refs = 1;
8542                         tmpl.metadata = 1;
8543                         tmpl.max_size = size;
8544                         ret = add_extent_rec(extent_cache, &tmpl);
8545                         if (ret < 0)
8546                                 goto out;
8547
8548                         ret = add_tree_backref(extent_cache, ptr, parent,
8549                                         owner, 1);
8550                         if (ret < 0) {
8551                                 error(
8552                                 "add_tree_backref failed (non-leaf block): %s",
8553                                       strerror(-ret));
8554                                 continue;
8555                         }
8556
8557                         if (level > 1) {
8558                                 add_pending(nodes, seen, ptr, size);
8559                         } else {
8560                                 add_pending(pending, seen, ptr, size);
8561                         }
8562                 }
8563                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8564                                       nritems) * sizeof(struct btrfs_key_ptr);
8565         }
8566         total_btree_bytes += buf->len;
8567         if (fs_root_objectid(btrfs_header_owner(buf)))
8568                 total_fs_tree_bytes += buf->len;
8569         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8570                 total_extent_tree_bytes += buf->len;
8571 out:
8572         free_extent_buffer(buf);
8573         return ret;
8574 }
8575
8576 static int add_root_to_pending(struct extent_buffer *buf,
8577                                struct cache_tree *extent_cache,
8578                                struct cache_tree *pending,
8579                                struct cache_tree *seen,
8580                                struct cache_tree *nodes,
8581                                u64 objectid)
8582 {
8583         struct extent_record tmpl;
8584         int ret;
8585
8586         if (btrfs_header_level(buf) > 0)
8587                 add_pending(nodes, seen, buf->start, buf->len);
8588         else
8589                 add_pending(pending, seen, buf->start, buf->len);
8590
8591         memset(&tmpl, 0, sizeof(tmpl));
8592         tmpl.start = buf->start;
8593         tmpl.nr = buf->len;
8594         tmpl.is_root = 1;
8595         tmpl.refs = 1;
8596         tmpl.metadata = 1;
8597         tmpl.max_size = buf->len;
8598         add_extent_rec(extent_cache, &tmpl);
8599
8600         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8601             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8602                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8603                                 0, 1);
8604         else
8605                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8606                                 1);
8607         return ret;
8608 }
8609
8610 /* as we fix the tree, we might be deleting blocks that
8611  * we're tracking for repair.  This hook makes sure we
8612  * remove any backrefs for blocks as we are fixing them.
8613  */
8614 static int free_extent_hook(struct btrfs_trans_handle *trans,
8615                             struct btrfs_root *root,
8616                             u64 bytenr, u64 num_bytes, u64 parent,
8617                             u64 root_objectid, u64 owner, u64 offset,
8618                             int refs_to_drop)
8619 {
8620         struct extent_record *rec;
8621         struct cache_extent *cache;
8622         int is_data;
8623         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8624
8625         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8626         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8627         if (!cache)
8628                 return 0;
8629
8630         rec = container_of(cache, struct extent_record, cache);
8631         if (is_data) {
8632                 struct data_backref *back;
8633                 back = find_data_backref(rec, parent, root_objectid, owner,
8634                                          offset, 1, bytenr, num_bytes);
8635                 if (!back)
8636                         goto out;
8637                 if (back->node.found_ref) {
8638                         back->found_ref -= refs_to_drop;
8639                         if (rec->refs)
8640                                 rec->refs -= refs_to_drop;
8641                 }
8642                 if (back->node.found_extent_tree) {
8643                         back->num_refs -= refs_to_drop;
8644                         if (rec->extent_item_refs)
8645                                 rec->extent_item_refs -= refs_to_drop;
8646                 }
8647                 if (back->found_ref == 0)
8648                         back->node.found_ref = 0;
8649                 if (back->num_refs == 0)
8650                         back->node.found_extent_tree = 0;
8651
8652                 if (!back->node.found_extent_tree && back->node.found_ref) {
8653                         rb_erase(&back->node.node, &rec->backref_tree);
8654                         free(back);
8655                 }
8656         } else {
8657                 struct tree_backref *back;
8658                 back = find_tree_backref(rec, parent, root_objectid);
8659                 if (!back)
8660                         goto out;
8661                 if (back->node.found_ref) {
8662                         if (rec->refs)
8663                                 rec->refs--;
8664                         back->node.found_ref = 0;
8665                 }
8666                 if (back->node.found_extent_tree) {
8667                         if (rec->extent_item_refs)
8668                                 rec->extent_item_refs--;
8669                         back->node.found_extent_tree = 0;
8670                 }
8671                 if (!back->node.found_extent_tree && back->node.found_ref) {
8672                         rb_erase(&back->node.node, &rec->backref_tree);
8673                         free(back);
8674                 }
8675         }
8676         maybe_free_extent_rec(extent_cache, rec);
8677 out:
8678         return 0;
8679 }
8680
8681 static int delete_extent_records(struct btrfs_trans_handle *trans,
8682                                  struct btrfs_root *root,
8683                                  struct btrfs_path *path,
8684                                  u64 bytenr)
8685 {
8686         struct btrfs_key key;
8687         struct btrfs_key found_key;
8688         struct extent_buffer *leaf;
8689         int ret;
8690         int slot;
8691
8692
8693         key.objectid = bytenr;
8694         key.type = (u8)-1;
8695         key.offset = (u64)-1;
8696
8697         while(1) {
8698                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8699                                         &key, path, 0, 1);
8700                 if (ret < 0)
8701                         break;
8702
8703                 if (ret > 0) {
8704                         ret = 0;
8705                         if (path->slots[0] == 0)
8706                                 break;
8707                         path->slots[0]--;
8708                 }
8709                 ret = 0;
8710
8711                 leaf = path->nodes[0];
8712                 slot = path->slots[0];
8713
8714                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8715                 if (found_key.objectid != bytenr)
8716                         break;
8717
8718                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8719                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8720                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8721                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8722                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8723                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8724                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8725                         btrfs_release_path(path);
8726                         if (found_key.type == 0) {
8727                                 if (found_key.offset == 0)
8728                                         break;
8729                                 key.offset = found_key.offset - 1;
8730                                 key.type = found_key.type;
8731                         }
8732                         key.type = found_key.type - 1;
8733                         key.offset = (u64)-1;
8734                         continue;
8735                 }
8736
8737                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8738                         found_key.objectid, found_key.type, found_key.offset);
8739
8740                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8741                 if (ret)
8742                         break;
8743                 btrfs_release_path(path);
8744
8745                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8746                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8747                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8748                                 found_key.offset : root->fs_info->nodesize;
8749
8750                         ret = btrfs_update_block_group(trans, root, bytenr,
8751                                                        bytes, 0, 0);
8752                         if (ret)
8753                                 break;
8754                 }
8755         }
8756
8757         btrfs_release_path(path);
8758         return ret;
8759 }
8760
8761 /*
8762  * for a single backref, this will allocate a new extent
8763  * and add the backref to it.
8764  */
8765 static int record_extent(struct btrfs_trans_handle *trans,
8766                          struct btrfs_fs_info *info,
8767                          struct btrfs_path *path,
8768                          struct extent_record *rec,
8769                          struct extent_backref *back,
8770                          int allocated, u64 flags)
8771 {
8772         int ret = 0;
8773         struct btrfs_root *extent_root = info->extent_root;
8774         struct extent_buffer *leaf;
8775         struct btrfs_key ins_key;
8776         struct btrfs_extent_item *ei;
8777         struct data_backref *dback;
8778         struct btrfs_tree_block_info *bi;
8779
8780         if (!back->is_data)
8781                 rec->max_size = max_t(u64, rec->max_size,
8782                                     info->nodesize);
8783
8784         if (!allocated) {
8785                 u32 item_size = sizeof(*ei);
8786
8787                 if (!back->is_data)
8788                         item_size += sizeof(*bi);
8789
8790                 ins_key.objectid = rec->start;
8791                 ins_key.offset = rec->max_size;
8792                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8793
8794                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8795                                         &ins_key, item_size);
8796                 if (ret)
8797                         goto fail;
8798
8799                 leaf = path->nodes[0];
8800                 ei = btrfs_item_ptr(leaf, path->slots[0],
8801                                     struct btrfs_extent_item);
8802
8803                 btrfs_set_extent_refs(leaf, ei, 0);
8804                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8805
8806                 if (back->is_data) {
8807                         btrfs_set_extent_flags(leaf, ei,
8808                                                BTRFS_EXTENT_FLAG_DATA);
8809                 } else {
8810                         struct btrfs_disk_key copy_key;;
8811
8812                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8813                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8814                                              sizeof(*bi));
8815
8816                         btrfs_set_disk_key_objectid(&copy_key,
8817                                                     rec->info_objectid);
8818                         btrfs_set_disk_key_type(&copy_key, 0);
8819                         btrfs_set_disk_key_offset(&copy_key, 0);
8820
8821                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8822                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8823
8824                         btrfs_set_extent_flags(leaf, ei,
8825                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8826                 }
8827
8828                 btrfs_mark_buffer_dirty(leaf);
8829                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8830                                                rec->max_size, 1, 0);
8831                 if (ret)
8832                         goto fail;
8833                 btrfs_release_path(path);
8834         }
8835
8836         if (back->is_data) {
8837                 u64 parent;
8838                 int i;
8839
8840                 dback = to_data_backref(back);
8841                 if (back->full_backref)
8842                         parent = dback->parent;
8843                 else
8844                         parent = 0;
8845
8846                 for (i = 0; i < dback->found_ref; i++) {
8847                         /* if parent != 0, we're doing a full backref
8848                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8849                          * just makes the backref allocator create a data
8850                          * backref
8851                          */
8852                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8853                                                    rec->start, rec->max_size,
8854                                                    parent,
8855                                                    dback->root,
8856                                                    parent ?
8857                                                    BTRFS_FIRST_FREE_OBJECTID :
8858                                                    dback->owner,
8859                                                    dback->offset);
8860                         if (ret)
8861                                 break;
8862                 }
8863                 fprintf(stderr, "adding new data backref"
8864                                 " on %llu %s %llu owner %llu"
8865                                 " offset %llu found %d\n",
8866                                 (unsigned long long)rec->start,
8867                                 back->full_backref ?
8868                                 "parent" : "root",
8869                                 back->full_backref ?
8870                                 (unsigned long long)parent :
8871                                 (unsigned long long)dback->root,
8872                                 (unsigned long long)dback->owner,
8873                                 (unsigned long long)dback->offset,
8874                                 dback->found_ref);
8875         } else {
8876                 u64 parent;
8877                 struct tree_backref *tback;
8878
8879                 tback = to_tree_backref(back);
8880                 if (back->full_backref)
8881                         parent = tback->parent;
8882                 else
8883                         parent = 0;
8884
8885                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8886                                            rec->start, rec->max_size,
8887                                            parent, tback->root, 0, 0);
8888                 fprintf(stderr, "adding new tree backref on "
8889                         "start %llu len %llu parent %llu root %llu\n",
8890                         rec->start, rec->max_size, parent, tback->root);
8891         }
8892 fail:
8893         btrfs_release_path(path);
8894         return ret;
8895 }
8896
8897 static struct extent_entry *find_entry(struct list_head *entries,
8898                                        u64 bytenr, u64 bytes)
8899 {
8900         struct extent_entry *entry = NULL;
8901
8902         list_for_each_entry(entry, entries, list) {
8903                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8904                         return entry;
8905         }
8906
8907         return NULL;
8908 }
8909
8910 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8911 {
8912         struct extent_entry *entry, *best = NULL, *prev = NULL;
8913
8914         list_for_each_entry(entry, entries, list) {
8915                 /*
8916                  * If there are as many broken entries as entries then we know
8917                  * not to trust this particular entry.
8918                  */
8919                 if (entry->broken == entry->count)
8920                         continue;
8921
8922                 /*
8923                  * Special case, when there are only two entries and 'best' is
8924                  * the first one
8925                  */
8926                 if (!prev) {
8927                         best = entry;
8928                         prev = entry;
8929                         continue;
8930                 }
8931
8932                 /*
8933                  * If our current entry == best then we can't be sure our best
8934                  * is really the best, so we need to keep searching.
8935                  */
8936                 if (best && best->count == entry->count) {
8937                         prev = entry;
8938                         best = NULL;
8939                         continue;
8940                 }
8941
8942                 /* Prev == entry, not good enough, have to keep searching */
8943                 if (!prev->broken && prev->count == entry->count)
8944                         continue;
8945
8946                 if (!best)
8947                         best = (prev->count > entry->count) ? prev : entry;
8948                 else if (best->count < entry->count)
8949                         best = entry;
8950                 prev = entry;
8951         }
8952
8953         return best;
8954 }
8955
8956 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8957                       struct data_backref *dback, struct extent_entry *entry)
8958 {
8959         struct btrfs_trans_handle *trans;
8960         struct btrfs_root *root;
8961         struct btrfs_file_extent_item *fi;
8962         struct extent_buffer *leaf;
8963         struct btrfs_key key;
8964         u64 bytenr, bytes;
8965         int ret, err;
8966
8967         key.objectid = dback->root;
8968         key.type = BTRFS_ROOT_ITEM_KEY;
8969         key.offset = (u64)-1;
8970         root = btrfs_read_fs_root(info, &key);
8971         if (IS_ERR(root)) {
8972                 fprintf(stderr, "Couldn't find root for our ref\n");
8973                 return -EINVAL;
8974         }
8975
8976         /*
8977          * The backref points to the original offset of the extent if it was
8978          * split, so we need to search down to the offset we have and then walk
8979          * forward until we find the backref we're looking for.
8980          */
8981         key.objectid = dback->owner;
8982         key.type = BTRFS_EXTENT_DATA_KEY;
8983         key.offset = dback->offset;
8984         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8985         if (ret < 0) {
8986                 fprintf(stderr, "Error looking up ref %d\n", ret);
8987                 return ret;
8988         }
8989
8990         while (1) {
8991                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8992                         ret = btrfs_next_leaf(root, path);
8993                         if (ret) {
8994                                 fprintf(stderr, "Couldn't find our ref, next\n");
8995                                 return -EINVAL;
8996                         }
8997                 }
8998                 leaf = path->nodes[0];
8999                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9000                 if (key.objectid != dback->owner ||
9001                     key.type != BTRFS_EXTENT_DATA_KEY) {
9002                         fprintf(stderr, "Couldn't find our ref, search\n");
9003                         return -EINVAL;
9004                 }
9005                 fi = btrfs_item_ptr(leaf, path->slots[0],
9006                                     struct btrfs_file_extent_item);
9007                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9008                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9009
9010                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9011                         break;
9012                 path->slots[0]++;
9013         }
9014
9015         btrfs_release_path(path);
9016
9017         trans = btrfs_start_transaction(root, 1);
9018         if (IS_ERR(trans))
9019                 return PTR_ERR(trans);
9020
9021         /*
9022          * Ok we have the key of the file extent we want to fix, now we can cow
9023          * down to the thing and fix it.
9024          */
9025         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9026         if (ret < 0) {
9027                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9028                         key.objectid, key.type, key.offset, ret);
9029                 goto out;
9030         }
9031         if (ret > 0) {
9032                 fprintf(stderr, "Well that's odd, we just found this key "
9033                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9034                         key.offset);
9035                 ret = -EINVAL;
9036                 goto out;
9037         }
9038         leaf = path->nodes[0];
9039         fi = btrfs_item_ptr(leaf, path->slots[0],
9040                             struct btrfs_file_extent_item);
9041
9042         if (btrfs_file_extent_compression(leaf, fi) &&
9043             dback->disk_bytenr != entry->bytenr) {
9044                 fprintf(stderr, "Ref doesn't match the record start and is "
9045                         "compressed, please take a btrfs-image of this file "
9046                         "system and send it to a btrfs developer so they can "
9047                         "complete this functionality for bytenr %Lu\n",
9048                         dback->disk_bytenr);
9049                 ret = -EINVAL;
9050                 goto out;
9051         }
9052
9053         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9054                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9055         } else if (dback->disk_bytenr > entry->bytenr) {
9056                 u64 off_diff, offset;
9057
9058                 off_diff = dback->disk_bytenr - entry->bytenr;
9059                 offset = btrfs_file_extent_offset(leaf, fi);
9060                 if (dback->disk_bytenr + offset +
9061                     btrfs_file_extent_num_bytes(leaf, fi) >
9062                     entry->bytenr + entry->bytes) {
9063                         fprintf(stderr, "Ref is past the entry end, please "
9064                                 "take a btrfs-image of this file system and "
9065                                 "send it to a btrfs developer, ref %Lu\n",
9066                                 dback->disk_bytenr);
9067                         ret = -EINVAL;
9068                         goto out;
9069                 }
9070                 offset += off_diff;
9071                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9072                 btrfs_set_file_extent_offset(leaf, fi, offset);
9073         } else if (dback->disk_bytenr < entry->bytenr) {
9074                 u64 offset;
9075
9076                 offset = btrfs_file_extent_offset(leaf, fi);
9077                 if (dback->disk_bytenr + offset < entry->bytenr) {
9078                         fprintf(stderr, "Ref is before the entry start, please"
9079                                 " take a btrfs-image of this file system and "
9080                                 "send it to a btrfs developer, ref %Lu\n",
9081                                 dback->disk_bytenr);
9082                         ret = -EINVAL;
9083                         goto out;
9084                 }
9085
9086                 offset += dback->disk_bytenr;
9087                 offset -= entry->bytenr;
9088                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9089                 btrfs_set_file_extent_offset(leaf, fi, offset);
9090         }
9091
9092         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9093
9094         /*
9095          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9096          * only do this if we aren't using compression, otherwise it's a
9097          * trickier case.
9098          */
9099         if (!btrfs_file_extent_compression(leaf, fi))
9100                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9101         else
9102                 printf("ram bytes may be wrong?\n");
9103         btrfs_mark_buffer_dirty(leaf);
9104 out:
9105         err = btrfs_commit_transaction(trans, root);
9106         btrfs_release_path(path);
9107         return ret ? ret : err;
9108 }
9109
9110 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9111                            struct extent_record *rec)
9112 {
9113         struct extent_backref *back, *tmp;
9114         struct data_backref *dback;
9115         struct extent_entry *entry, *best = NULL;
9116         LIST_HEAD(entries);
9117         int nr_entries = 0;
9118         int broken_entries = 0;
9119         int ret = 0;
9120         short mismatch = 0;
9121
9122         /*
9123          * Metadata is easy and the backrefs should always agree on bytenr and
9124          * size, if not we've got bigger issues.
9125          */
9126         if (rec->metadata)
9127                 return 0;
9128
9129         rbtree_postorder_for_each_entry_safe(back, tmp,
9130                                              &rec->backref_tree, node) {
9131                 if (back->full_backref || !back->is_data)
9132                         continue;
9133
9134                 dback = to_data_backref(back);
9135
9136                 /*
9137                  * We only pay attention to backrefs that we found a real
9138                  * backref for.
9139                  */
9140                 if (dback->found_ref == 0)
9141                         continue;
9142
9143                 /*
9144                  * For now we only catch when the bytes don't match, not the
9145                  * bytenr.  We can easily do this at the same time, but I want
9146                  * to have a fs image to test on before we just add repair
9147                  * functionality willy-nilly so we know we won't screw up the
9148                  * repair.
9149                  */
9150
9151                 entry = find_entry(&entries, dback->disk_bytenr,
9152                                    dback->bytes);
9153                 if (!entry) {
9154                         entry = malloc(sizeof(struct extent_entry));
9155                         if (!entry) {
9156                                 ret = -ENOMEM;
9157                                 goto out;
9158                         }
9159                         memset(entry, 0, sizeof(*entry));
9160                         entry->bytenr = dback->disk_bytenr;
9161                         entry->bytes = dback->bytes;
9162                         list_add_tail(&entry->list, &entries);
9163                         nr_entries++;
9164                 }
9165
9166                 /*
9167                  * If we only have on entry we may think the entries agree when
9168                  * in reality they don't so we have to do some extra checking.
9169                  */
9170                 if (dback->disk_bytenr != rec->start ||
9171                     dback->bytes != rec->nr || back->broken)
9172                         mismatch = 1;
9173
9174                 if (back->broken) {
9175                         entry->broken++;
9176                         broken_entries++;
9177                 }
9178
9179                 entry->count++;
9180         }
9181
9182         /* Yay all the backrefs agree, carry on good sir */
9183         if (nr_entries <= 1 && !mismatch)
9184                 goto out;
9185
9186         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9187                 "%Lu\n", rec->start);
9188
9189         /*
9190          * First we want to see if the backrefs can agree amongst themselves who
9191          * is right, so figure out which one of the entries has the highest
9192          * count.
9193          */
9194         best = find_most_right_entry(&entries);
9195
9196         /*
9197          * Ok so we may have an even split between what the backrefs think, so
9198          * this is where we use the extent ref to see what it thinks.
9199          */
9200         if (!best) {
9201                 entry = find_entry(&entries, rec->start, rec->nr);
9202                 if (!entry && (!broken_entries || !rec->found_rec)) {
9203                         fprintf(stderr, "Backrefs don't agree with each other "
9204                                 "and extent record doesn't agree with anybody,"
9205                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9206                                 rec->start, rec->nr);
9207                         ret = -EINVAL;
9208                         goto out;
9209                 } else if (!entry) {
9210                         /*
9211                          * Ok our backrefs were broken, we'll assume this is the
9212                          * correct value and add an entry for this range.
9213                          */
9214                         entry = malloc(sizeof(struct extent_entry));
9215                         if (!entry) {
9216                                 ret = -ENOMEM;
9217                                 goto out;
9218                         }
9219                         memset(entry, 0, sizeof(*entry));
9220                         entry->bytenr = rec->start;
9221                         entry->bytes = rec->nr;
9222                         list_add_tail(&entry->list, &entries);
9223                         nr_entries++;
9224                 }
9225                 entry->count++;
9226                 best = find_most_right_entry(&entries);
9227                 if (!best) {
9228                         fprintf(stderr, "Backrefs and extent record evenly "
9229                                 "split on who is right, this is going to "
9230                                 "require user input to fix bytenr %Lu bytes "
9231                                 "%Lu\n", rec->start, rec->nr);
9232                         ret = -EINVAL;
9233                         goto out;
9234                 }
9235         }
9236
9237         /*
9238          * I don't think this can happen currently as we'll abort() if we catch
9239          * this case higher up, but in case somebody removes that we still can't
9240          * deal with it properly here yet, so just bail out of that's the case.
9241          */
9242         if (best->bytenr != rec->start) {
9243                 fprintf(stderr, "Extent start and backref starts don't match, "
9244                         "please use btrfs-image on this file system and send "
9245                         "it to a btrfs developer so they can make fsck fix "
9246                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9247                         rec->start, rec->nr);
9248                 ret = -EINVAL;
9249                 goto out;
9250         }
9251
9252         /*
9253          * Ok great we all agreed on an extent record, let's go find the real
9254          * references and fix up the ones that don't match.
9255          */
9256         rbtree_postorder_for_each_entry_safe(back, tmp,
9257                                              &rec->backref_tree, node) {
9258                 if (back->full_backref || !back->is_data)
9259                         continue;
9260
9261                 dback = to_data_backref(back);
9262
9263                 /*
9264                  * Still ignoring backrefs that don't have a real ref attached
9265                  * to them.
9266                  */
9267                 if (dback->found_ref == 0)
9268                         continue;
9269
9270                 if (dback->bytes == best->bytes &&
9271                     dback->disk_bytenr == best->bytenr)
9272                         continue;
9273
9274                 ret = repair_ref(info, path, dback, best);
9275                 if (ret)
9276                         goto out;
9277         }
9278
9279         /*
9280          * Ok we messed with the actual refs, which means we need to drop our
9281          * entire cache and go back and rescan.  I know this is a huge pain and
9282          * adds a lot of extra work, but it's the only way to be safe.  Once all
9283          * the backrefs agree we may not need to do anything to the extent
9284          * record itself.
9285          */
9286         ret = -EAGAIN;
9287 out:
9288         while (!list_empty(&entries)) {
9289                 entry = list_entry(entries.next, struct extent_entry, list);
9290                 list_del_init(&entry->list);
9291                 free(entry);
9292         }
9293         return ret;
9294 }
9295
9296 static int process_duplicates(struct cache_tree *extent_cache,
9297                               struct extent_record *rec)
9298 {
9299         struct extent_record *good, *tmp;
9300         struct cache_extent *cache;
9301         int ret;
9302
9303         /*
9304          * If we found a extent record for this extent then return, or if we
9305          * have more than one duplicate we are likely going to need to delete
9306          * something.
9307          */
9308         if (rec->found_rec || rec->num_duplicates > 1)
9309                 return 0;
9310
9311         /* Shouldn't happen but just in case */
9312         BUG_ON(!rec->num_duplicates);
9313
9314         /*
9315          * So this happens if we end up with a backref that doesn't match the
9316          * actual extent entry.  So either the backref is bad or the extent
9317          * entry is bad.  Either way we want to have the extent_record actually
9318          * reflect what we found in the extent_tree, so we need to take the
9319          * duplicate out and use that as the extent_record since the only way we
9320          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9321          */
9322         remove_cache_extent(extent_cache, &rec->cache);
9323
9324         good = to_extent_record(rec->dups.next);
9325         list_del_init(&good->list);
9326         INIT_LIST_HEAD(&good->backrefs);
9327         INIT_LIST_HEAD(&good->dups);
9328         good->cache.start = good->start;
9329         good->cache.size = good->nr;
9330         good->content_checked = 0;
9331         good->owner_ref_checked = 0;
9332         good->num_duplicates = 0;
9333         good->refs = rec->refs;
9334         list_splice_init(&rec->backrefs, &good->backrefs);
9335         while (1) {
9336                 cache = lookup_cache_extent(extent_cache, good->start,
9337                                             good->nr);
9338                 if (!cache)
9339                         break;
9340                 tmp = container_of(cache, struct extent_record, cache);
9341
9342                 /*
9343                  * If we find another overlapping extent and it's found_rec is
9344                  * set then it's a duplicate and we need to try and delete
9345                  * something.
9346                  */
9347                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9348                         if (list_empty(&good->list))
9349                                 list_add_tail(&good->list,
9350                                               &duplicate_extents);
9351                         good->num_duplicates += tmp->num_duplicates + 1;
9352                         list_splice_init(&tmp->dups, &good->dups);
9353                         list_del_init(&tmp->list);
9354                         list_add_tail(&tmp->list, &good->dups);
9355                         remove_cache_extent(extent_cache, &tmp->cache);
9356                         continue;
9357                 }
9358
9359                 /*
9360                  * Ok we have another non extent item backed extent rec, so lets
9361                  * just add it to this extent and carry on like we did above.
9362                  */
9363                 good->refs += tmp->refs;
9364                 list_splice_init(&tmp->backrefs, &good->backrefs);
9365                 remove_cache_extent(extent_cache, &tmp->cache);
9366                 free(tmp);
9367         }
9368         ret = insert_cache_extent(extent_cache, &good->cache);
9369         BUG_ON(ret);
9370         free(rec);
9371         return good->num_duplicates ? 0 : 1;
9372 }
9373
9374 static int delete_duplicate_records(struct btrfs_root *root,
9375                                     struct extent_record *rec)
9376 {
9377         struct btrfs_trans_handle *trans;
9378         LIST_HEAD(delete_list);
9379         struct btrfs_path path;
9380         struct extent_record *tmp, *good, *n;
9381         int nr_del = 0;
9382         int ret = 0, err;
9383         struct btrfs_key key;
9384
9385         btrfs_init_path(&path);
9386
9387         good = rec;
9388         /* Find the record that covers all of the duplicates. */
9389         list_for_each_entry(tmp, &rec->dups, list) {
9390                 if (good->start < tmp->start)
9391                         continue;
9392                 if (good->nr > tmp->nr)
9393                         continue;
9394
9395                 if (tmp->start + tmp->nr < good->start + good->nr) {
9396                         fprintf(stderr, "Ok we have overlapping extents that "
9397                                 "aren't completely covered by each other, this "
9398                                 "is going to require more careful thought.  "
9399                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9400                                 tmp->start, tmp->nr, good->start, good->nr);
9401                         abort();
9402                 }
9403                 good = tmp;
9404         }
9405
9406         if (good != rec)
9407                 list_add_tail(&rec->list, &delete_list);
9408
9409         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9410                 if (tmp == good)
9411                         continue;
9412                 list_move_tail(&tmp->list, &delete_list);
9413         }
9414
9415         root = root->fs_info->extent_root;
9416         trans = btrfs_start_transaction(root, 1);
9417         if (IS_ERR(trans)) {
9418                 ret = PTR_ERR(trans);
9419                 goto out;
9420         }
9421
9422         list_for_each_entry(tmp, &delete_list, list) {
9423                 if (tmp->found_rec == 0)
9424                         continue;
9425                 key.objectid = tmp->start;
9426                 key.type = BTRFS_EXTENT_ITEM_KEY;
9427                 key.offset = tmp->nr;
9428
9429                 /* Shouldn't happen but just in case */
9430                 if (tmp->metadata) {
9431                         fprintf(stderr, "Well this shouldn't happen, extent "
9432                                 "record overlaps but is metadata? "
9433                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9434                         abort();
9435                 }
9436
9437                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9438                 if (ret) {
9439                         if (ret > 0)
9440                                 ret = -EINVAL;
9441                         break;
9442                 }
9443                 ret = btrfs_del_item(trans, root, &path);
9444                 if (ret)
9445                         break;
9446                 btrfs_release_path(&path);
9447                 nr_del++;
9448         }
9449         err = btrfs_commit_transaction(trans, root);
9450         if (err && !ret)
9451                 ret = err;
9452 out:
9453         while (!list_empty(&delete_list)) {
9454                 tmp = to_extent_record(delete_list.next);
9455                 list_del_init(&tmp->list);
9456                 if (tmp == rec)
9457                         continue;
9458                 free(tmp);
9459         }
9460
9461         while (!list_empty(&rec->dups)) {
9462                 tmp = to_extent_record(rec->dups.next);
9463                 list_del_init(&tmp->list);
9464                 free(tmp);
9465         }
9466
9467         btrfs_release_path(&path);
9468
9469         if (!ret && !nr_del)
9470                 rec->num_duplicates = 0;
9471
9472         return ret ? ret : nr_del;
9473 }
9474
9475 static int find_possible_backrefs(struct btrfs_fs_info *info,
9476                                   struct btrfs_path *path,
9477                                   struct cache_tree *extent_cache,
9478                                   struct extent_record *rec)
9479 {
9480         struct btrfs_root *root;
9481         struct extent_backref *back, *tmp;
9482         struct data_backref *dback;
9483         struct cache_extent *cache;
9484         struct btrfs_file_extent_item *fi;
9485         struct btrfs_key key;
9486         u64 bytenr, bytes;
9487         int ret;
9488
9489         rbtree_postorder_for_each_entry_safe(back, tmp,
9490                                              &rec->backref_tree, node) {
9491                 /* Don't care about full backrefs (poor unloved backrefs) */
9492                 if (back->full_backref || !back->is_data)
9493                         continue;
9494
9495                 dback = to_data_backref(back);
9496
9497                 /* We found this one, we don't need to do a lookup */
9498                 if (dback->found_ref)
9499                         continue;
9500
9501                 key.objectid = dback->root;
9502                 key.type = BTRFS_ROOT_ITEM_KEY;
9503                 key.offset = (u64)-1;
9504
9505                 root = btrfs_read_fs_root(info, &key);
9506
9507                 /* No root, definitely a bad ref, skip */
9508                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9509                         continue;
9510                 /* Other err, exit */
9511                 if (IS_ERR(root))
9512                         return PTR_ERR(root);
9513
9514                 key.objectid = dback->owner;
9515                 key.type = BTRFS_EXTENT_DATA_KEY;
9516                 key.offset = dback->offset;
9517                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9518                 if (ret) {
9519                         btrfs_release_path(path);
9520                         if (ret < 0)
9521                                 return ret;
9522                         /* Didn't find it, we can carry on */
9523                         ret = 0;
9524                         continue;
9525                 }
9526
9527                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9528                                     struct btrfs_file_extent_item);
9529                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9530                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9531                 btrfs_release_path(path);
9532                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9533                 if (cache) {
9534                         struct extent_record *tmp;
9535                         tmp = container_of(cache, struct extent_record, cache);
9536
9537                         /*
9538                          * If we found an extent record for the bytenr for this
9539                          * particular backref then we can't add it to our
9540                          * current extent record.  We only want to add backrefs
9541                          * that don't have a corresponding extent item in the
9542                          * extent tree since they likely belong to this record
9543                          * and we need to fix it if it doesn't match bytenrs.
9544                          */
9545                         if  (tmp->found_rec)
9546                                 continue;
9547                 }
9548
9549                 dback->found_ref += 1;
9550                 dback->disk_bytenr = bytenr;
9551                 dback->bytes = bytes;
9552
9553                 /*
9554                  * Set this so the verify backref code knows not to trust the
9555                  * values in this backref.
9556                  */
9557                 back->broken = 1;
9558         }
9559
9560         return 0;
9561 }
9562
9563 /*
9564  * Record orphan data ref into corresponding root.
9565  *
9566  * Return 0 if the extent item contains data ref and recorded.
9567  * Return 1 if the extent item contains no useful data ref
9568  *   On that case, it may contains only shared_dataref or metadata backref
9569  *   or the file extent exists(this should be handled by the extent bytenr
9570  *   recovery routine)
9571  * Return <0 if something goes wrong.
9572  */
9573 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9574                                       struct extent_record *rec)
9575 {
9576         struct btrfs_key key;
9577         struct btrfs_root *dest_root;
9578         struct extent_backref *back, *tmp;
9579         struct data_backref *dback;
9580         struct orphan_data_extent *orphan;
9581         struct btrfs_path path;
9582         int recorded_data_ref = 0;
9583         int ret = 0;
9584
9585         if (rec->metadata)
9586                 return 1;
9587         btrfs_init_path(&path);
9588         rbtree_postorder_for_each_entry_safe(back, tmp,
9589                                              &rec->backref_tree, node) {
9590                 if (back->full_backref || !back->is_data ||
9591                     !back->found_extent_tree)
9592                         continue;
9593                 dback = to_data_backref(back);
9594                 if (dback->found_ref)
9595                         continue;
9596                 key.objectid = dback->root;
9597                 key.type = BTRFS_ROOT_ITEM_KEY;
9598                 key.offset = (u64)-1;
9599
9600                 dest_root = btrfs_read_fs_root(fs_info, &key);
9601
9602                 /* For non-exist root we just skip it */
9603                 if (IS_ERR(dest_root) || !dest_root)
9604                         continue;
9605
9606                 key.objectid = dback->owner;
9607                 key.type = BTRFS_EXTENT_DATA_KEY;
9608                 key.offset = dback->offset;
9609
9610                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9611                 btrfs_release_path(&path);
9612                 /*
9613                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9614                  * we need to record it for inode/file extent rebuild.
9615                  * For ret > 0, we record it only for file extent rebuild.
9616                  * For ret == 0, the file extent exists but only bytenr
9617                  * mismatch, let the original bytenr fix routine to handle,
9618                  * don't record it.
9619                  */
9620                 if (ret == 0)
9621                         continue;
9622                 ret = 0;
9623                 orphan = malloc(sizeof(*orphan));
9624                 if (!orphan) {
9625                         ret = -ENOMEM;
9626                         goto out;
9627                 }
9628                 INIT_LIST_HEAD(&orphan->list);
9629                 orphan->root = dback->root;
9630                 orphan->objectid = dback->owner;
9631                 orphan->offset = dback->offset;
9632                 orphan->disk_bytenr = rec->cache.start;
9633                 orphan->disk_len = rec->cache.size;
9634                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9635                 recorded_data_ref = 1;
9636         }
9637 out:
9638         btrfs_release_path(&path);
9639         if (!ret)
9640                 return !recorded_data_ref;
9641         else
9642                 return ret;
9643 }
9644
9645 /*
9646  * when an incorrect extent item is found, this will delete
9647  * all of the existing entries for it and recreate them
9648  * based on what the tree scan found.
9649  */
9650 static int fixup_extent_refs(struct btrfs_fs_info *info,
9651                              struct cache_tree *extent_cache,
9652                              struct extent_record *rec)
9653 {
9654         struct btrfs_trans_handle *trans = NULL;
9655         int ret;
9656         struct btrfs_path path;
9657         struct cache_extent *cache;
9658         struct extent_backref *back, *tmp;
9659         int allocated = 0;
9660         u64 flags = 0;
9661
9662         if (rec->flag_block_full_backref)
9663                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9664
9665         btrfs_init_path(&path);
9666         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9667                 /*
9668                  * Sometimes the backrefs themselves are so broken they don't
9669                  * get attached to any meaningful rec, so first go back and
9670                  * check any of our backrefs that we couldn't find and throw
9671                  * them into the list if we find the backref so that
9672                  * verify_backrefs can figure out what to do.
9673                  */
9674                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9675                 if (ret < 0)
9676                         goto out;
9677         }
9678
9679         /* step one, make sure all of the backrefs agree */
9680         ret = verify_backrefs(info, &path, rec);
9681         if (ret < 0)
9682                 goto out;
9683
9684         trans = btrfs_start_transaction(info->extent_root, 1);
9685         if (IS_ERR(trans)) {
9686                 ret = PTR_ERR(trans);
9687                 goto out;
9688         }
9689
9690         /* step two, delete all the existing records */
9691         ret = delete_extent_records(trans, info->extent_root, &path,
9692                                     rec->start);
9693
9694         if (ret < 0)
9695                 goto out;
9696
9697         /* was this block corrupt?  If so, don't add references to it */
9698         cache = lookup_cache_extent(info->corrupt_blocks,
9699                                     rec->start, rec->max_size);
9700         if (cache) {
9701                 ret = 0;
9702                 goto out;
9703         }
9704
9705         /* step three, recreate all the refs we did find */
9706         rbtree_postorder_for_each_entry_safe(back, tmp,
9707                                              &rec->backref_tree, node) {
9708                 /*
9709                  * if we didn't find any references, don't create a
9710                  * new extent record
9711                  */
9712                 if (!back->found_ref)
9713                         continue;
9714
9715                 rec->bad_full_backref = 0;
9716                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9717                 allocated = 1;
9718
9719                 if (ret)
9720                         goto out;
9721         }
9722 out:
9723         if (trans) {
9724                 int err = btrfs_commit_transaction(trans, info->extent_root);
9725                 if (!ret)
9726                         ret = err;
9727         }
9728
9729         if (!ret)
9730                 fprintf(stderr, "Repaired extent references for %llu\n",
9731                                 (unsigned long long)rec->start);
9732
9733         btrfs_release_path(&path);
9734         return ret;
9735 }
9736
9737 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9738                               struct extent_record *rec)
9739 {
9740         struct btrfs_trans_handle *trans;
9741         struct btrfs_root *root = fs_info->extent_root;
9742         struct btrfs_path path;
9743         struct btrfs_extent_item *ei;
9744         struct btrfs_key key;
9745         u64 flags;
9746         int ret = 0;
9747
9748         key.objectid = rec->start;
9749         if (rec->metadata) {
9750                 key.type = BTRFS_METADATA_ITEM_KEY;
9751                 key.offset = rec->info_level;
9752         } else {
9753                 key.type = BTRFS_EXTENT_ITEM_KEY;
9754                 key.offset = rec->max_size;
9755         }
9756
9757         trans = btrfs_start_transaction(root, 0);
9758         if (IS_ERR(trans))
9759                 return PTR_ERR(trans);
9760
9761         btrfs_init_path(&path);
9762         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9763         if (ret < 0) {
9764                 btrfs_release_path(&path);
9765                 btrfs_commit_transaction(trans, root);
9766                 return ret;
9767         } else if (ret) {
9768                 fprintf(stderr, "Didn't find extent for %llu\n",
9769                         (unsigned long long)rec->start);
9770                 btrfs_release_path(&path);
9771                 btrfs_commit_transaction(trans, root);
9772                 return -ENOENT;
9773         }
9774
9775         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9776                             struct btrfs_extent_item);
9777         flags = btrfs_extent_flags(path.nodes[0], ei);
9778         if (rec->flag_block_full_backref) {
9779                 fprintf(stderr, "setting full backref on %llu\n",
9780                         (unsigned long long)key.objectid);
9781                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9782         } else {
9783                 fprintf(stderr, "clearing full backref on %llu\n",
9784                         (unsigned long long)key.objectid);
9785                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9786         }
9787         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9788         btrfs_mark_buffer_dirty(path.nodes[0]);
9789         btrfs_release_path(&path);
9790         ret = btrfs_commit_transaction(trans, root);
9791         if (!ret)
9792                 fprintf(stderr, "Repaired extent flags for %llu\n",
9793                                 (unsigned long long)rec->start);
9794
9795         return ret;
9796 }
9797
9798 /* right now we only prune from the extent allocation tree */
9799 static int prune_one_block(struct btrfs_trans_handle *trans,
9800                            struct btrfs_fs_info *info,
9801                            struct btrfs_corrupt_block *corrupt)
9802 {
9803         int ret;
9804         struct btrfs_path path;
9805         struct extent_buffer *eb;
9806         u64 found;
9807         int slot;
9808         int nritems;
9809         int level = corrupt->level + 1;
9810
9811         btrfs_init_path(&path);
9812 again:
9813         /* we want to stop at the parent to our busted block */
9814         path.lowest_level = level;
9815
9816         ret = btrfs_search_slot(trans, info->extent_root,
9817                                 &corrupt->key, &path, -1, 1);
9818
9819         if (ret < 0)
9820                 goto out;
9821
9822         eb = path.nodes[level];
9823         if (!eb) {
9824                 ret = -ENOENT;
9825                 goto out;
9826         }
9827
9828         /*
9829          * hopefully the search gave us the block we want to prune,
9830          * lets try that first
9831          */
9832         slot = path.slots[level];
9833         found =  btrfs_node_blockptr(eb, slot);
9834         if (found == corrupt->cache.start)
9835                 goto del_ptr;
9836
9837         nritems = btrfs_header_nritems(eb);
9838
9839         /* the search failed, lets scan this node and hope we find it */
9840         for (slot = 0; slot < nritems; slot++) {
9841                 found =  btrfs_node_blockptr(eb, slot);
9842                 if (found == corrupt->cache.start)
9843                         goto del_ptr;
9844         }
9845         /*
9846          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9847          * to this block
9848          */
9849         if (eb == info->extent_root->node) {
9850                 ret = -ENOENT;
9851                 goto out;
9852         } else {
9853                 level++;
9854                 btrfs_release_path(&path);
9855                 goto again;
9856         }
9857
9858 del_ptr:
9859         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9860         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9861
9862 out:
9863         btrfs_release_path(&path);
9864         return ret;
9865 }
9866
9867 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9868 {
9869         struct btrfs_trans_handle *trans = NULL;
9870         struct cache_extent *cache;
9871         struct btrfs_corrupt_block *corrupt;
9872
9873         while (1) {
9874                 cache = search_cache_extent(info->corrupt_blocks, 0);
9875                 if (!cache)
9876                         break;
9877                 if (!trans) {
9878                         trans = btrfs_start_transaction(info->extent_root, 1);
9879                         if (IS_ERR(trans))
9880                                 return PTR_ERR(trans);
9881                 }
9882                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9883                 prune_one_block(trans, info, corrupt);
9884                 remove_cache_extent(info->corrupt_blocks, cache);
9885         }
9886         if (trans)
9887                 return btrfs_commit_transaction(trans, info->extent_root);
9888         return 0;
9889 }
9890
9891 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9892 {
9893         struct btrfs_block_group_cache *cache;
9894         u64 start, end;
9895         int ret;
9896
9897         while (1) {
9898                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9899                                             &start, &end, EXTENT_DIRTY);
9900                 if (ret)
9901                         break;
9902                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9903         }
9904
9905         start = 0;
9906         while (1) {
9907                 cache = btrfs_lookup_first_block_group(fs_info, start);
9908                 if (!cache)
9909                         break;
9910                 if (cache->cached)
9911                         cache->cached = 0;
9912                 start = cache->key.objectid + cache->key.offset;
9913         }
9914 }
9915
9916 static int check_extent_refs(struct btrfs_root *root,
9917                              struct cache_tree *extent_cache)
9918 {
9919         struct extent_record *rec;
9920         struct cache_extent *cache;
9921         int ret = 0;
9922         int had_dups = 0;
9923
9924         if (repair) {
9925                 /*
9926                  * if we're doing a repair, we have to make sure
9927                  * we don't allocate from the problem extents.
9928                  * In the worst case, this will be all the
9929                  * extents in the FS
9930                  */
9931                 cache = search_cache_extent(extent_cache, 0);
9932                 while(cache) {
9933                         rec = container_of(cache, struct extent_record, cache);
9934                         set_extent_dirty(root->fs_info->excluded_extents,
9935                                          rec->start,
9936                                          rec->start + rec->max_size - 1);
9937                         cache = next_cache_extent(cache);
9938                 }
9939
9940                 /* pin down all the corrupted blocks too */
9941                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9942                 while(cache) {
9943                         set_extent_dirty(root->fs_info->excluded_extents,
9944                                          cache->start,
9945                                          cache->start + cache->size - 1);
9946                         cache = next_cache_extent(cache);
9947                 }
9948                 prune_corrupt_blocks(root->fs_info);
9949                 reset_cached_block_groups(root->fs_info);
9950         }
9951
9952         reset_cached_block_groups(root->fs_info);
9953
9954         /*
9955          * We need to delete any duplicate entries we find first otherwise we
9956          * could mess up the extent tree when we have backrefs that actually
9957          * belong to a different extent item and not the weird duplicate one.
9958          */
9959         while (repair && !list_empty(&duplicate_extents)) {
9960                 rec = to_extent_record(duplicate_extents.next);
9961                 list_del_init(&rec->list);
9962
9963                 /* Sometimes we can find a backref before we find an actual
9964                  * extent, so we need to process it a little bit to see if there
9965                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9966                  * if this is a backref screwup.  If we need to delete stuff
9967                  * process_duplicates() will return 0, otherwise it will return
9968                  * 1 and we
9969                  */
9970                 if (process_duplicates(extent_cache, rec))
9971                         continue;
9972                 ret = delete_duplicate_records(root, rec);
9973                 if (ret < 0)
9974                         return ret;
9975                 /*
9976                  * delete_duplicate_records will return the number of entries
9977                  * deleted, so if it's greater than 0 then we know we actually
9978                  * did something and we need to remove.
9979                  */
9980                 if (ret)
9981                         had_dups = 1;
9982         }
9983
9984         if (had_dups)
9985                 return -EAGAIN;
9986
9987         while(1) {
9988                 int cur_err = 0;
9989                 int fix = 0;
9990
9991                 cache = search_cache_extent(extent_cache, 0);
9992                 if (!cache)
9993                         break;
9994                 rec = container_of(cache, struct extent_record, cache);
9995                 if (rec->num_duplicates) {
9996                         fprintf(stderr, "extent item %llu has multiple extent "
9997                                 "items\n", (unsigned long long)rec->start);
9998                         cur_err = 1;
9999                 }
10000
10001                 if (rec->refs != rec->extent_item_refs) {
10002                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10003                                 (unsigned long long)rec->start,
10004                                 (unsigned long long)rec->nr);
10005                         fprintf(stderr, "extent item %llu, found %llu\n",
10006                                 (unsigned long long)rec->extent_item_refs,
10007                                 (unsigned long long)rec->refs);
10008                         ret = record_orphan_data_extents(root->fs_info, rec);
10009                         if (ret < 0)
10010                                 goto repair_abort;
10011                         fix = ret;
10012                         cur_err = 1;
10013                 }
10014                 if (all_backpointers_checked(rec, 1)) {
10015                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10016                                 (unsigned long long)rec->start,
10017                                 (unsigned long long)rec->nr);
10018                         fix = 1;
10019                         cur_err = 1;
10020                 }
10021                 if (!rec->owner_ref_checked) {
10022                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10023                                 (unsigned long long)rec->start,
10024                                 (unsigned long long)rec->nr);
10025                         fix = 1;
10026                         cur_err = 1;
10027                 }
10028
10029                 if (repair && fix) {
10030                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10031                         if (ret)
10032                                 goto repair_abort;
10033                 }
10034
10035
10036                 if (rec->bad_full_backref) {
10037                         fprintf(stderr, "bad full backref, on [%llu]\n",
10038                                 (unsigned long long)rec->start);
10039                         if (repair) {
10040                                 ret = fixup_extent_flags(root->fs_info, rec);
10041                                 if (ret)
10042                                         goto repair_abort;
10043                                 fix = 1;
10044                         }
10045                         cur_err = 1;
10046                 }
10047                 /*
10048                  * Although it's not a extent ref's problem, we reuse this
10049                  * routine for error reporting.
10050                  * No repair function yet.
10051                  */
10052                 if (rec->crossing_stripes) {
10053                         fprintf(stderr,
10054                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10055                                 rec->start, rec->start + rec->max_size);
10056                         cur_err = 1;
10057                 }
10058
10059                 if (rec->wrong_chunk_type) {
10060                         fprintf(stderr,
10061                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10062                                 rec->start, rec->start + rec->max_size);
10063                         cur_err = 1;
10064                 }
10065
10066                 remove_cache_extent(extent_cache, cache);
10067                 free_all_extent_backrefs(rec);
10068                 if (!init_extent_tree && repair && (!cur_err || fix))
10069                         clear_extent_dirty(root->fs_info->excluded_extents,
10070                                            rec->start,
10071                                            rec->start + rec->max_size - 1);
10072                 free(rec);
10073         }
10074 repair_abort:
10075         if (repair) {
10076                 if (ret && ret != -EAGAIN) {
10077                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10078                         exit(1);
10079                 } else if (!ret) {
10080                         struct btrfs_trans_handle *trans;
10081
10082                         root = root->fs_info->extent_root;
10083                         trans = btrfs_start_transaction(root, 1);
10084                         if (IS_ERR(trans)) {
10085                                 ret = PTR_ERR(trans);
10086                                 goto repair_abort;
10087                         }
10088
10089                         ret = btrfs_fix_block_accounting(trans, root);
10090                         if (ret)
10091                                 goto repair_abort;
10092                         ret = btrfs_commit_transaction(trans, root);
10093                         if (ret)
10094                                 goto repair_abort;
10095                 }
10096                 return ret;
10097         }
10098         return 0;
10099 }
10100
10101 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10102 {
10103         u64 stripe_size;
10104
10105         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10106                 stripe_size = length;
10107                 stripe_size /= num_stripes;
10108         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10109                 stripe_size = length * 2;
10110                 stripe_size /= num_stripes;
10111         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10112                 stripe_size = length;
10113                 stripe_size /= (num_stripes - 1);
10114         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10115                 stripe_size = length;
10116                 stripe_size /= (num_stripes - 2);
10117         } else {
10118                 stripe_size = length;
10119         }
10120         return stripe_size;
10121 }
10122
10123 /*
10124  * Check the chunk with its block group/dev list ref:
10125  * Return 0 if all refs seems valid.
10126  * Return 1 if part of refs seems valid, need later check for rebuild ref
10127  * like missing block group and needs to search extent tree to rebuild them.
10128  * Return -1 if essential refs are missing and unable to rebuild.
10129  */
10130 static int check_chunk_refs(struct chunk_record *chunk_rec,
10131                             struct block_group_tree *block_group_cache,
10132                             struct device_extent_tree *dev_extent_cache,
10133                             int silent)
10134 {
10135         struct cache_extent *block_group_item;
10136         struct block_group_record *block_group_rec;
10137         struct cache_extent *dev_extent_item;
10138         struct device_extent_record *dev_extent_rec;
10139         u64 devid;
10140         u64 offset;
10141         u64 length;
10142         int metadump_v2 = 0;
10143         int i;
10144         int ret = 0;
10145
10146         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10147                                                chunk_rec->offset,
10148                                                chunk_rec->length);
10149         if (block_group_item) {
10150                 block_group_rec = container_of(block_group_item,
10151                                                struct block_group_record,
10152                                                cache);
10153                 if (chunk_rec->length != block_group_rec->offset ||
10154                     chunk_rec->offset != block_group_rec->objectid ||
10155                     (!metadump_v2 &&
10156                      chunk_rec->type_flags != block_group_rec->flags)) {
10157                         if (!silent)
10158                                 fprintf(stderr,
10159                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10160                                         chunk_rec->objectid,
10161                                         chunk_rec->type,
10162                                         chunk_rec->offset,
10163                                         chunk_rec->length,
10164                                         chunk_rec->offset,
10165                                         chunk_rec->type_flags,
10166                                         block_group_rec->objectid,
10167                                         block_group_rec->type,
10168                                         block_group_rec->offset,
10169                                         block_group_rec->offset,
10170                                         block_group_rec->objectid,
10171                                         block_group_rec->flags);
10172                         ret = -1;
10173                 } else {
10174                         list_del_init(&block_group_rec->list);
10175                         chunk_rec->bg_rec = block_group_rec;
10176                 }
10177         } else {
10178                 if (!silent)
10179                         fprintf(stderr,
10180                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10181                                 chunk_rec->objectid,
10182                                 chunk_rec->type,
10183                                 chunk_rec->offset,
10184                                 chunk_rec->length,
10185                                 chunk_rec->offset,
10186                                 chunk_rec->type_flags);
10187                 ret = 1;
10188         }
10189
10190         if (metadump_v2)
10191                 return ret;
10192
10193         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10194                                     chunk_rec->num_stripes);
10195         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10196                 devid = chunk_rec->stripes[i].devid;
10197                 offset = chunk_rec->stripes[i].offset;
10198                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10199                                                        devid, offset, length);
10200                 if (dev_extent_item) {
10201                         dev_extent_rec = container_of(dev_extent_item,
10202                                                 struct device_extent_record,
10203                                                 cache);
10204                         if (dev_extent_rec->objectid != devid ||
10205                             dev_extent_rec->offset != offset ||
10206                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10207                             dev_extent_rec->length != length) {
10208                                 if (!silent)
10209                                         fprintf(stderr,
10210                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10211                                                 chunk_rec->objectid,
10212                                                 chunk_rec->type,
10213                                                 chunk_rec->offset,
10214                                                 chunk_rec->stripes[i].devid,
10215                                                 chunk_rec->stripes[i].offset,
10216                                                 dev_extent_rec->objectid,
10217                                                 dev_extent_rec->offset,
10218                                                 dev_extent_rec->length);
10219                                 ret = -1;
10220                         } else {
10221                                 list_move(&dev_extent_rec->chunk_list,
10222                                           &chunk_rec->dextents);
10223                         }
10224                 } else {
10225                         if (!silent)
10226                                 fprintf(stderr,
10227                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10228                                         chunk_rec->objectid,
10229                                         chunk_rec->type,
10230                                         chunk_rec->offset,
10231                                         chunk_rec->stripes[i].devid,
10232                                         chunk_rec->stripes[i].offset);
10233                         ret = -1;
10234                 }
10235         }
10236         return ret;
10237 }
10238
10239 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10240 int check_chunks(struct cache_tree *chunk_cache,
10241                  struct block_group_tree *block_group_cache,
10242                  struct device_extent_tree *dev_extent_cache,
10243                  struct list_head *good, struct list_head *bad,
10244                  struct list_head *rebuild, int silent)
10245 {
10246         struct cache_extent *chunk_item;
10247         struct chunk_record *chunk_rec;
10248         struct block_group_record *bg_rec;
10249         struct device_extent_record *dext_rec;
10250         int err;
10251         int ret = 0;
10252
10253         chunk_item = first_cache_extent(chunk_cache);
10254         while (chunk_item) {
10255                 chunk_rec = container_of(chunk_item, struct chunk_record,
10256                                          cache);
10257                 err = check_chunk_refs(chunk_rec, block_group_cache,
10258                                        dev_extent_cache, silent);
10259                 if (err < 0)
10260                         ret = err;
10261                 if (err == 0 && good)
10262                         list_add_tail(&chunk_rec->list, good);
10263                 if (err > 0 && rebuild)
10264                         list_add_tail(&chunk_rec->list, rebuild);
10265                 if (err < 0 && bad)
10266                         list_add_tail(&chunk_rec->list, bad);
10267                 chunk_item = next_cache_extent(chunk_item);
10268         }
10269
10270         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10271                 if (!silent)
10272                         fprintf(stderr,
10273                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10274                                 bg_rec->objectid,
10275                                 bg_rec->offset,
10276                                 bg_rec->flags);
10277                 if (!ret)
10278                         ret = 1;
10279         }
10280
10281         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10282                             chunk_list) {
10283                 if (!silent)
10284                         fprintf(stderr,
10285                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10286                                 dext_rec->objectid,
10287                                 dext_rec->offset,
10288                                 dext_rec->length);
10289                 if (!ret)
10290                         ret = 1;
10291         }
10292         return ret;
10293 }
10294
10295
10296 static int check_device_used(struct device_record *dev_rec,
10297                              struct device_extent_tree *dext_cache)
10298 {
10299         struct cache_extent *cache;
10300         struct device_extent_record *dev_extent_rec;
10301         u64 total_byte = 0;
10302
10303         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10304         while (cache) {
10305                 dev_extent_rec = container_of(cache,
10306                                               struct device_extent_record,
10307                                               cache);
10308                 if (dev_extent_rec->objectid != dev_rec->devid)
10309                         break;
10310
10311                 list_del_init(&dev_extent_rec->device_list);
10312                 total_byte += dev_extent_rec->length;
10313                 cache = next_cache_extent(cache);
10314         }
10315
10316         if (total_byte != dev_rec->byte_used) {
10317                 fprintf(stderr,
10318                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10319                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10320                         dev_rec->type, dev_rec->offset);
10321                 return -1;
10322         } else {
10323                 return 0;
10324         }
10325 }
10326
10327 /* check btrfs_dev_item -> btrfs_dev_extent */
10328 static int check_devices(struct rb_root *dev_cache,
10329                          struct device_extent_tree *dev_extent_cache)
10330 {
10331         struct rb_node *dev_node;
10332         struct device_record *dev_rec;
10333         struct device_extent_record *dext_rec;
10334         int err;
10335         int ret = 0;
10336
10337         dev_node = rb_first(dev_cache);
10338         while (dev_node) {
10339                 dev_rec = container_of(dev_node, struct device_record, node);
10340                 err = check_device_used(dev_rec, dev_extent_cache);
10341                 if (err)
10342                         ret = err;
10343
10344                 dev_node = rb_next(dev_node);
10345         }
10346         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10347                             device_list) {
10348                 fprintf(stderr,
10349                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10350                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10351                 if (!ret)
10352                         ret = 1;
10353         }
10354         return ret;
10355 }
10356
10357 static int add_root_item_to_list(struct list_head *head,
10358                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10359                                   u8 level, u8 drop_level,
10360                                   struct btrfs_key *drop_key)
10361 {
10362
10363         struct root_item_record *ri_rec;
10364         ri_rec = malloc(sizeof(*ri_rec));
10365         if (!ri_rec)
10366                 return -ENOMEM;
10367         ri_rec->bytenr = bytenr;
10368         ri_rec->objectid = objectid;
10369         ri_rec->level = level;
10370         ri_rec->drop_level = drop_level;
10371         ri_rec->last_snapshot = last_snapshot;
10372         if (drop_key)
10373                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10374         list_add_tail(&ri_rec->list, head);
10375
10376         return 0;
10377 }
10378
10379 static void free_root_item_list(struct list_head *list)
10380 {
10381         struct root_item_record *ri_rec;
10382
10383         while (!list_empty(list)) {
10384                 ri_rec = list_first_entry(list, struct root_item_record,
10385                                           list);
10386                 list_del_init(&ri_rec->list);
10387                 free(ri_rec);
10388         }
10389 }
10390
10391 static int deal_root_from_list(struct list_head *list,
10392                                struct btrfs_root *root,
10393                                struct block_info *bits,
10394                                int bits_nr,
10395                                struct cache_tree *pending,
10396                                struct cache_tree *seen,
10397                                struct cache_tree *reada,
10398                                struct cache_tree *nodes,
10399                                struct cache_tree *extent_cache,
10400                                struct cache_tree *chunk_cache,
10401                                struct rb_root *dev_cache,
10402                                struct block_group_tree *block_group_cache,
10403                                struct device_extent_tree *dev_extent_cache)
10404 {
10405         int ret = 0;
10406         u64 last;
10407
10408         while (!list_empty(list)) {
10409                 struct root_item_record *rec;
10410                 struct extent_buffer *buf;
10411                 rec = list_entry(list->next,
10412                                  struct root_item_record, list);
10413                 last = 0;
10414                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10415                 if (!extent_buffer_uptodate(buf)) {
10416                         free_extent_buffer(buf);
10417                         ret = -EIO;
10418                         break;
10419                 }
10420                 ret = add_root_to_pending(buf, extent_cache, pending,
10421                                     seen, nodes, rec->objectid);
10422                 if (ret < 0)
10423                         break;
10424                 /*
10425                  * To rebuild extent tree, we need deal with snapshot
10426                  * one by one, otherwise we deal with node firstly which
10427                  * can maximize readahead.
10428                  */
10429                 while (1) {
10430                         ret = run_next_block(root, bits, bits_nr, &last,
10431                                              pending, seen, reada, nodes,
10432                                              extent_cache, chunk_cache,
10433                                              dev_cache, block_group_cache,
10434                                              dev_extent_cache, rec);
10435                         if (ret != 0)
10436                                 break;
10437                 }
10438                 free_extent_buffer(buf);
10439                 list_del(&rec->list);
10440                 free(rec);
10441                 if (ret < 0)
10442                         break;
10443         }
10444         while (ret >= 0) {
10445                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10446                                      reada, nodes, extent_cache, chunk_cache,
10447                                      dev_cache, block_group_cache,
10448                                      dev_extent_cache, NULL);
10449                 if (ret != 0) {
10450                         if (ret > 0)
10451                                 ret = 0;
10452                         break;
10453                 }
10454         }
10455         return ret;
10456 }
10457
10458 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10459 {
10460         struct rb_root dev_cache;
10461         struct cache_tree chunk_cache;
10462         struct block_group_tree block_group_cache;
10463         struct device_extent_tree dev_extent_cache;
10464         struct cache_tree extent_cache;
10465         struct cache_tree seen;
10466         struct cache_tree pending;
10467         struct cache_tree reada;
10468         struct cache_tree nodes;
10469         struct extent_io_tree excluded_extents;
10470         struct cache_tree corrupt_blocks;
10471         struct btrfs_path path;
10472         struct btrfs_key key;
10473         struct btrfs_key found_key;
10474         int ret, err = 0;
10475         struct block_info *bits;
10476         int bits_nr;
10477         struct extent_buffer *leaf;
10478         int slot;
10479         struct btrfs_root_item ri;
10480         struct list_head dropping_trees;
10481         struct list_head normal_trees;
10482         struct btrfs_root *root1;
10483         struct btrfs_root *root;
10484         u64 objectid;
10485         u8 level;
10486
10487         root = fs_info->fs_root;
10488         dev_cache = RB_ROOT;
10489         cache_tree_init(&chunk_cache);
10490         block_group_tree_init(&block_group_cache);
10491         device_extent_tree_init(&dev_extent_cache);
10492
10493         cache_tree_init(&extent_cache);
10494         cache_tree_init(&seen);
10495         cache_tree_init(&pending);
10496         cache_tree_init(&nodes);
10497         cache_tree_init(&reada);
10498         cache_tree_init(&corrupt_blocks);
10499         extent_io_tree_init(&excluded_extents);
10500         INIT_LIST_HEAD(&dropping_trees);
10501         INIT_LIST_HEAD(&normal_trees);
10502
10503         if (repair) {
10504                 fs_info->excluded_extents = &excluded_extents;
10505                 fs_info->fsck_extent_cache = &extent_cache;
10506                 fs_info->free_extent_hook = free_extent_hook;
10507                 fs_info->corrupt_blocks = &corrupt_blocks;
10508         }
10509
10510         bits_nr = 1024;
10511         bits = malloc(bits_nr * sizeof(struct block_info));
10512         if (!bits) {
10513                 perror("malloc");
10514                 exit(1);
10515         }
10516
10517         if (ctx.progress_enabled) {
10518                 ctx.tp = TASK_EXTENTS;
10519                 task_start(ctx.info);
10520         }
10521
10522 again:
10523         root1 = fs_info->tree_root;
10524         level = btrfs_header_level(root1->node);
10525         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10526                                     root1->node->start, 0, level, 0, NULL);
10527         if (ret < 0)
10528                 goto out;
10529         root1 = fs_info->chunk_root;
10530         level = btrfs_header_level(root1->node);
10531         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10532                                     root1->node->start, 0, level, 0, NULL);
10533         if (ret < 0)
10534                 goto out;
10535         btrfs_init_path(&path);
10536         key.offset = 0;
10537         key.objectid = 0;
10538         key.type = BTRFS_ROOT_ITEM_KEY;
10539         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10540         if (ret < 0)
10541                 goto out;
10542         while(1) {
10543                 leaf = path.nodes[0];
10544                 slot = path.slots[0];
10545                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10546                         ret = btrfs_next_leaf(root, &path);
10547                         if (ret != 0)
10548                                 break;
10549                         leaf = path.nodes[0];
10550                         slot = path.slots[0];
10551                 }
10552                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10553                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10554                         unsigned long offset;
10555                         u64 last_snapshot;
10556
10557                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10558                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10559                         last_snapshot = btrfs_root_last_snapshot(&ri);
10560                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10561                                 level = btrfs_root_level(&ri);
10562                                 ret = add_root_item_to_list(&normal_trees,
10563                                                 found_key.objectid,
10564                                                 btrfs_root_bytenr(&ri),
10565                                                 last_snapshot, level,
10566                                                 0, NULL);
10567                                 if (ret < 0)
10568                                         goto out;
10569                         } else {
10570                                 level = btrfs_root_level(&ri);
10571                                 objectid = found_key.objectid;
10572                                 btrfs_disk_key_to_cpu(&found_key,
10573                                                       &ri.drop_progress);
10574                                 ret = add_root_item_to_list(&dropping_trees,
10575                                                 objectid,
10576                                                 btrfs_root_bytenr(&ri),
10577                                                 last_snapshot, level,
10578                                                 ri.drop_level, &found_key);
10579                                 if (ret < 0)
10580                                         goto out;
10581                         }
10582                 }
10583                 path.slots[0]++;
10584         }
10585         btrfs_release_path(&path);
10586
10587         /*
10588          * check_block can return -EAGAIN if it fixes something, please keep
10589          * this in mind when dealing with return values from these functions, if
10590          * we get -EAGAIN we want to fall through and restart the loop.
10591          */
10592         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10593                                   &seen, &reada, &nodes, &extent_cache,
10594                                   &chunk_cache, &dev_cache, &block_group_cache,
10595                                   &dev_extent_cache);
10596         if (ret < 0) {
10597                 if (ret == -EAGAIN)
10598                         goto loop;
10599                 goto out;
10600         }
10601         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10602                                   &pending, &seen, &reada, &nodes,
10603                                   &extent_cache, &chunk_cache, &dev_cache,
10604                                   &block_group_cache, &dev_extent_cache);
10605         if (ret < 0) {
10606                 if (ret == -EAGAIN)
10607                         goto loop;
10608                 goto out;
10609         }
10610
10611         ret = check_chunks(&chunk_cache, &block_group_cache,
10612                            &dev_extent_cache, NULL, NULL, NULL, 0);
10613         if (ret) {
10614                 if (ret == -EAGAIN)
10615                         goto loop;
10616                 err = ret;
10617         }
10618
10619         ret = check_extent_refs(root, &extent_cache);
10620         if (ret < 0) {
10621                 if (ret == -EAGAIN)
10622                         goto loop;
10623                 goto out;
10624         }
10625
10626         ret = check_devices(&dev_cache, &dev_extent_cache);
10627         if (ret && err)
10628                 ret = err;
10629
10630 out:
10631         task_stop(ctx.info);
10632         if (repair) {
10633                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10634                 extent_io_tree_cleanup(&excluded_extents);
10635                 fs_info->fsck_extent_cache = NULL;
10636                 fs_info->free_extent_hook = NULL;
10637                 fs_info->corrupt_blocks = NULL;
10638                 fs_info->excluded_extents = NULL;
10639         }
10640         free(bits);
10641         free_chunk_cache_tree(&chunk_cache);
10642         free_device_cache_tree(&dev_cache);
10643         free_block_group_tree(&block_group_cache);
10644         free_device_extent_tree(&dev_extent_cache);
10645         free_extent_cache_tree(&seen);
10646         free_extent_cache_tree(&pending);
10647         free_extent_cache_tree(&reada);
10648         free_extent_cache_tree(&nodes);
10649         free_root_item_list(&normal_trees);
10650         free_root_item_list(&dropping_trees);
10651         return ret;
10652 loop:
10653         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10654         free_extent_cache_tree(&seen);
10655         free_extent_cache_tree(&pending);
10656         free_extent_cache_tree(&reada);
10657         free_extent_cache_tree(&nodes);
10658         free_chunk_cache_tree(&chunk_cache);
10659         free_block_group_tree(&block_group_cache);
10660         free_device_cache_tree(&dev_cache);
10661         free_device_extent_tree(&dev_extent_cache);
10662         free_extent_record_cache(&extent_cache);
10663         free_root_item_list(&normal_trees);
10664         free_root_item_list(&dropping_trees);
10665         extent_io_tree_cleanup(&excluded_extents);
10666         goto again;
10667 }
10668
10669 /*
10670  * Check backrefs of a tree block given by @bytenr or @eb.
10671  *
10672  * @root:       the root containing the @bytenr or @eb
10673  * @eb:         tree block extent buffer, can be NULL
10674  * @bytenr:     bytenr of the tree block to search
10675  * @level:      tree level of the tree block
10676  * @owner:      owner of the tree block
10677  *
10678  * Return >0 for any error found and output error message
10679  * Return 0 for no error found
10680  */
10681 static int check_tree_block_ref(struct btrfs_root *root,
10682                                 struct extent_buffer *eb, u64 bytenr,
10683                                 int level, u64 owner)
10684 {
10685         struct btrfs_key key;
10686         struct btrfs_root *extent_root = root->fs_info->extent_root;
10687         struct btrfs_path path;
10688         struct btrfs_extent_item *ei;
10689         struct btrfs_extent_inline_ref *iref;
10690         struct extent_buffer *leaf;
10691         unsigned long end;
10692         unsigned long ptr;
10693         int slot;
10694         int skinny_level;
10695         int type;
10696         u32 nodesize = root->fs_info->nodesize;
10697         u32 item_size;
10698         u64 offset;
10699         int tree_reloc_root = 0;
10700         int found_ref = 0;
10701         int err = 0;
10702         int ret;
10703
10704         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10705             btrfs_header_bytenr(root->node) == bytenr)
10706                 tree_reloc_root = 1;
10707
10708         btrfs_init_path(&path);
10709         key.objectid = bytenr;
10710         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10711                 key.type = BTRFS_METADATA_ITEM_KEY;
10712         else
10713                 key.type = BTRFS_EXTENT_ITEM_KEY;
10714         key.offset = (u64)-1;
10715
10716         /* Search for the backref in extent tree */
10717         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10718         if (ret < 0) {
10719                 err |= BACKREF_MISSING;
10720                 goto out;
10721         }
10722         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10723         if (ret) {
10724                 err |= BACKREF_MISSING;
10725                 goto out;
10726         }
10727
10728         leaf = path.nodes[0];
10729         slot = path.slots[0];
10730         btrfs_item_key_to_cpu(leaf, &key, slot);
10731
10732         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10733
10734         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10735                 skinny_level = (int)key.offset;
10736                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10737         } else {
10738                 struct btrfs_tree_block_info *info;
10739
10740                 info = (struct btrfs_tree_block_info *)(ei + 1);
10741                 skinny_level = btrfs_tree_block_level(leaf, info);
10742                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10743         }
10744
10745         if (eb) {
10746                 u64 header_gen;
10747                 u64 extent_gen;
10748
10749                 if (!(btrfs_extent_flags(leaf, ei) &
10750                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10751                         error(
10752                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10753                                 key.objectid, nodesize,
10754                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10755                         err = BACKREF_MISMATCH;
10756                 }
10757                 header_gen = btrfs_header_generation(eb);
10758                 extent_gen = btrfs_extent_generation(leaf, ei);
10759                 if (header_gen != extent_gen) {
10760                         error(
10761         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10762                                 key.objectid, nodesize, header_gen,
10763                                 extent_gen);
10764                         err = BACKREF_MISMATCH;
10765                 }
10766                 if (level != skinny_level) {
10767                         error(
10768                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10769                                 key.objectid, nodesize, level, skinny_level);
10770                         err = BACKREF_MISMATCH;
10771                 }
10772                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10773                         error(
10774                         "extent[%llu %u] is referred by other roots than %llu",
10775                                 key.objectid, nodesize, root->objectid);
10776                         err = BACKREF_MISMATCH;
10777                 }
10778         }
10779
10780         /*
10781          * Iterate the extent/metadata item to find the exact backref
10782          */
10783         item_size = btrfs_item_size_nr(leaf, slot);
10784         ptr = (unsigned long)iref;
10785         end = (unsigned long)ei + item_size;
10786         while (ptr < end) {
10787                 iref = (struct btrfs_extent_inline_ref *)ptr;
10788                 type = btrfs_extent_inline_ref_type(leaf, iref);
10789                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10790
10791                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10792                         (offset == root->objectid || offset == owner)) {
10793                         found_ref = 1;
10794                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10795                         /*
10796                          * Backref of tree reloc root points to itself, no need
10797                          * to check backref any more.
10798                          */
10799                         if (tree_reloc_root)
10800                                 found_ref = 1;
10801                         else
10802                         /* Check if the backref points to valid referencer */
10803                                 found_ref = !check_tree_block_ref(root, NULL,
10804                                                 offset, level + 1, owner);
10805                 }
10806
10807                 if (found_ref)
10808                         break;
10809                 ptr += btrfs_extent_inline_ref_size(type);
10810         }
10811
10812         /*
10813          * Inlined extent item doesn't have what we need, check
10814          * TREE_BLOCK_REF_KEY
10815          */
10816         if (!found_ref) {
10817                 btrfs_release_path(&path);
10818                 key.objectid = bytenr;
10819                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10820                 key.offset = root->objectid;
10821
10822                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10823                 if (!ret)
10824                         found_ref = 1;
10825         }
10826         if (!found_ref)
10827                 err |= BACKREF_MISSING;
10828 out:
10829         btrfs_release_path(&path);
10830         if (eb && (err & BACKREF_MISSING))
10831                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10832                         bytenr, nodesize, owner, level);
10833         return err;
10834 }
10835
10836 /*
10837  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10838  *
10839  * Return >0 any error found and output error message
10840  * Return 0 for no error found
10841  */
10842 static int check_extent_data_item(struct btrfs_root *root,
10843                                   struct extent_buffer *eb, int slot)
10844 {
10845         struct btrfs_file_extent_item *fi;
10846         struct btrfs_path path;
10847         struct btrfs_root *extent_root = root->fs_info->extent_root;
10848         struct btrfs_key fi_key;
10849         struct btrfs_key dbref_key;
10850         struct extent_buffer *leaf;
10851         struct btrfs_extent_item *ei;
10852         struct btrfs_extent_inline_ref *iref;
10853         struct btrfs_extent_data_ref *dref;
10854         u64 owner;
10855         u64 disk_bytenr;
10856         u64 disk_num_bytes;
10857         u64 extent_num_bytes;
10858         u64 extent_flags;
10859         u32 item_size;
10860         unsigned long end;
10861         unsigned long ptr;
10862         int type;
10863         u64 ref_root;
10864         int found_dbackref = 0;
10865         int err = 0;
10866         int ret;
10867
10868         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10869         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10870
10871         /* Nothing to check for hole and inline data extents */
10872         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10873             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10874                 return 0;
10875
10876         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10877         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10878         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10879
10880         /* Check unaligned disk_num_bytes and num_bytes */
10881         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10882                 error(
10883 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10884                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10885                         root->fs_info->sectorsize);
10886                 err |= BYTES_UNALIGNED;
10887         } else {
10888                 data_bytes_allocated += disk_num_bytes;
10889         }
10890         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10891                 error(
10892 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10893                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10894                         root->fs_info->sectorsize);
10895                 err |= BYTES_UNALIGNED;
10896         } else {
10897                 data_bytes_referenced += extent_num_bytes;
10898         }
10899         owner = btrfs_header_owner(eb);
10900
10901         /* Check the extent item of the file extent in extent tree */
10902         btrfs_init_path(&path);
10903         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10904         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10905         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10906
10907         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10908         if (ret)
10909                 goto out;
10910
10911         leaf = path.nodes[0];
10912         slot = path.slots[0];
10913         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10914
10915         extent_flags = btrfs_extent_flags(leaf, ei);
10916
10917         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10918                 error(
10919                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10920                     disk_bytenr, disk_num_bytes,
10921                     BTRFS_EXTENT_FLAG_DATA);
10922                 err |= BACKREF_MISMATCH;
10923         }
10924
10925         /* Check data backref inside that extent item */
10926         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10927         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10928         ptr = (unsigned long)iref;
10929         end = (unsigned long)ei + item_size;
10930         while (ptr < end) {
10931                 iref = (struct btrfs_extent_inline_ref *)ptr;
10932                 type = btrfs_extent_inline_ref_type(leaf, iref);
10933                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10934
10935                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10936                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10937                         if (ref_root == owner || ref_root == root->objectid)
10938                                 found_dbackref = 1;
10939                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10940                         found_dbackref = !check_tree_block_ref(root, NULL,
10941                                 btrfs_extent_inline_ref_offset(leaf, iref),
10942                                 0, owner);
10943                 }
10944
10945                 if (found_dbackref)
10946                         break;
10947                 ptr += btrfs_extent_inline_ref_size(type);
10948         }
10949
10950         if (!found_dbackref) {
10951                 btrfs_release_path(&path);
10952
10953                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10954                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10955                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10956                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10957                                 fi_key.objectid, fi_key.offset);
10958
10959                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10960                                         &dbref_key, &path, 0, 0);
10961                 if (!ret) {
10962                         found_dbackref = 1;
10963                         goto out;
10964                 }
10965
10966                 btrfs_release_path(&path);
10967
10968                 /*
10969                  * Neither inlined nor EXTENT_DATA_REF found, try
10970                  * SHARED_DATA_REF as last chance.
10971                  */
10972                 dbref_key.objectid = disk_bytenr;
10973                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10974                 dbref_key.offset = eb->start;
10975
10976                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10977                                         &dbref_key, &path, 0, 0);
10978                 if (!ret) {
10979                         found_dbackref = 1;
10980                         goto out;
10981                 }
10982         }
10983
10984 out:
10985         if (!found_dbackref)
10986                 err |= BACKREF_MISSING;
10987         btrfs_release_path(&path);
10988         if (err & BACKREF_MISSING) {
10989                 error("data extent[%llu %llu] backref lost",
10990                       disk_bytenr, disk_num_bytes);
10991         }
10992         return err;
10993 }
10994
10995 /*
10996  * Get real tree block level for the case like shared block
10997  * Return >= 0 as tree level
10998  * Return <0 for error
10999  */
11000 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11001 {
11002         struct extent_buffer *eb;
11003         struct btrfs_path path;
11004         struct btrfs_key key;
11005         struct btrfs_extent_item *ei;
11006         u64 flags;
11007         u64 transid;
11008         u8 backref_level;
11009         u8 header_level;
11010         int ret;
11011
11012         /* Search extent tree for extent generation and level */
11013         key.objectid = bytenr;
11014         key.type = BTRFS_METADATA_ITEM_KEY;
11015         key.offset = (u64)-1;
11016
11017         btrfs_init_path(&path);
11018         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11019         if (ret < 0)
11020                 goto release_out;
11021         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11022         if (ret < 0)
11023                 goto release_out;
11024         if (ret > 0) {
11025                 ret = -ENOENT;
11026                 goto release_out;
11027         }
11028
11029         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11030         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11031                             struct btrfs_extent_item);
11032         flags = btrfs_extent_flags(path.nodes[0], ei);
11033         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11034                 ret = -ENOENT;
11035                 goto release_out;
11036         }
11037
11038         /* Get transid for later read_tree_block() check */
11039         transid = btrfs_extent_generation(path.nodes[0], ei);
11040
11041         /* Get backref level as one source */
11042         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11043                 backref_level = key.offset;
11044         } else {
11045                 struct btrfs_tree_block_info *info;
11046
11047                 info = (struct btrfs_tree_block_info *)(ei + 1);
11048                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11049         }
11050         btrfs_release_path(&path);
11051
11052         /* Get level from tree block as an alternative source */
11053         eb = read_tree_block(fs_info, bytenr, transid);
11054         if (!extent_buffer_uptodate(eb)) {
11055                 free_extent_buffer(eb);
11056                 return -EIO;
11057         }
11058         header_level = btrfs_header_level(eb);
11059         free_extent_buffer(eb);
11060
11061         if (header_level != backref_level)
11062                 return -EIO;
11063         return header_level;
11064
11065 release_out:
11066         btrfs_release_path(&path);
11067         return ret;
11068 }
11069
11070 /*
11071  * Check if a tree block backref is valid (points to a valid tree block)
11072  * if level == -1, level will be resolved
11073  * Return >0 for any error found and print error message
11074  */
11075 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11076                                     u64 bytenr, int level)
11077 {
11078         struct btrfs_root *root;
11079         struct btrfs_key key;
11080         struct btrfs_path path;
11081         struct extent_buffer *eb;
11082         struct extent_buffer *node;
11083         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11084         int err = 0;
11085         int ret;
11086
11087         /* Query level for level == -1 special case */
11088         if (level == -1)
11089                 level = query_tree_block_level(fs_info, bytenr);
11090         if (level < 0) {
11091                 err |= REFERENCER_MISSING;
11092                 goto out;
11093         }
11094
11095         key.objectid = root_id;
11096         key.type = BTRFS_ROOT_ITEM_KEY;
11097         key.offset = (u64)-1;
11098
11099         root = btrfs_read_fs_root(fs_info, &key);
11100         if (IS_ERR(root)) {
11101                 err |= REFERENCER_MISSING;
11102                 goto out;
11103         }
11104
11105         /* Read out the tree block to get item/node key */
11106         eb = read_tree_block(fs_info, bytenr, 0);
11107         if (!extent_buffer_uptodate(eb)) {
11108                 err |= REFERENCER_MISSING;
11109                 free_extent_buffer(eb);
11110                 goto out;
11111         }
11112
11113         /* Empty tree, no need to check key */
11114         if (!btrfs_header_nritems(eb) && !level) {
11115                 free_extent_buffer(eb);
11116                 goto out;
11117         }
11118
11119         if (level)
11120                 btrfs_node_key_to_cpu(eb, &key, 0);
11121         else
11122                 btrfs_item_key_to_cpu(eb, &key, 0);
11123
11124         free_extent_buffer(eb);
11125
11126         btrfs_init_path(&path);
11127         path.lowest_level = level;
11128         /* Search with the first key, to ensure we can reach it */
11129         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11130         if (ret < 0) {
11131                 err |= REFERENCER_MISSING;
11132                 goto release_out;
11133         }
11134
11135         node = path.nodes[level];
11136         if (btrfs_header_bytenr(node) != bytenr) {
11137                 error(
11138         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11139                         bytenr, nodesize, bytenr,
11140                         btrfs_header_bytenr(node));
11141                 err |= REFERENCER_MISMATCH;
11142         }
11143         if (btrfs_header_level(node) != level) {
11144                 error(
11145         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11146                         bytenr, nodesize, level,
11147                         btrfs_header_level(node));
11148                 err |= REFERENCER_MISMATCH;
11149         }
11150
11151 release_out:
11152         btrfs_release_path(&path);
11153 out:
11154         if (err & REFERENCER_MISSING) {
11155                 if (level < 0)
11156                         error("extent [%llu %d] lost referencer (owner: %llu)",
11157                                 bytenr, nodesize, root_id);
11158                 else
11159                         error(
11160                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11161                                 bytenr, nodesize, root_id, level);
11162         }
11163
11164         return err;
11165 }
11166
11167 /*
11168  * Check if tree block @eb is tree reloc root.
11169  * Return 0 if it's not or any problem happens
11170  * Return 1 if it's a tree reloc root
11171  */
11172 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11173                                  struct extent_buffer *eb)
11174 {
11175         struct btrfs_root *tree_reloc_root;
11176         struct btrfs_key key;
11177         u64 bytenr = btrfs_header_bytenr(eb);
11178         u64 owner = btrfs_header_owner(eb);
11179         int ret = 0;
11180
11181         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11182         key.offset = owner;
11183         key.type = BTRFS_ROOT_ITEM_KEY;
11184
11185         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11186         if (IS_ERR(tree_reloc_root))
11187                 return 0;
11188
11189         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11190                 ret = 1;
11191         btrfs_free_fs_root(tree_reloc_root);
11192         return ret;
11193 }
11194
11195 /*
11196  * Check referencer for shared block backref
11197  * If level == -1, this function will resolve the level.
11198  */
11199 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11200                                      u64 parent, u64 bytenr, int level)
11201 {
11202         struct extent_buffer *eb;
11203         u32 nr;
11204         int found_parent = 0;
11205         int i;
11206
11207         eb = read_tree_block(fs_info, parent, 0);
11208         if (!extent_buffer_uptodate(eb))
11209                 goto out;
11210
11211         if (level == -1)
11212                 level = query_tree_block_level(fs_info, bytenr);
11213         if (level < 0)
11214                 goto out;
11215
11216         /* It's possible it's a tree reloc root */
11217         if (parent == bytenr) {
11218                 if (is_tree_reloc_root(fs_info, eb))
11219                         found_parent = 1;
11220                 goto out;
11221         }
11222
11223         if (level + 1 != btrfs_header_level(eb))
11224                 goto out;
11225
11226         nr = btrfs_header_nritems(eb);
11227         for (i = 0; i < nr; i++) {
11228                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11229                         found_parent = 1;
11230                         break;
11231                 }
11232         }
11233 out:
11234         free_extent_buffer(eb);
11235         if (!found_parent) {
11236                 error(
11237         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11238                         bytenr, fs_info->nodesize, parent, level);
11239                 return REFERENCER_MISSING;
11240         }
11241         return 0;
11242 }
11243
11244 /*
11245  * Check referencer for normal (inlined) data ref
11246  * If len == 0, it will be resolved by searching in extent tree
11247  */
11248 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11249                                      u64 root_id, u64 objectid, u64 offset,
11250                                      u64 bytenr, u64 len, u32 count)
11251 {
11252         struct btrfs_root *root;
11253         struct btrfs_root *extent_root = fs_info->extent_root;
11254         struct btrfs_key key;
11255         struct btrfs_path path;
11256         struct extent_buffer *leaf;
11257         struct btrfs_file_extent_item *fi;
11258         u32 found_count = 0;
11259         int slot;
11260         int ret = 0;
11261
11262         if (!len) {
11263                 key.objectid = bytenr;
11264                 key.type = BTRFS_EXTENT_ITEM_KEY;
11265                 key.offset = (u64)-1;
11266
11267                 btrfs_init_path(&path);
11268                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11269                 if (ret < 0)
11270                         goto out;
11271                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11272                 if (ret)
11273                         goto out;
11274                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11275                 if (key.objectid != bytenr ||
11276                     key.type != BTRFS_EXTENT_ITEM_KEY)
11277                         goto out;
11278                 len = key.offset;
11279                 btrfs_release_path(&path);
11280         }
11281         key.objectid = root_id;
11282         key.type = BTRFS_ROOT_ITEM_KEY;
11283         key.offset = (u64)-1;
11284         btrfs_init_path(&path);
11285
11286         root = btrfs_read_fs_root(fs_info, &key);
11287         if (IS_ERR(root))
11288                 goto out;
11289
11290         key.objectid = objectid;
11291         key.type = BTRFS_EXTENT_DATA_KEY;
11292         /*
11293          * It can be nasty as data backref offset is
11294          * file offset - file extent offset, which is smaller or
11295          * equal to original backref offset.  The only special case is
11296          * overflow.  So we need to special check and do further search.
11297          */
11298         key.offset = offset & (1ULL << 63) ? 0 : offset;
11299
11300         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11301         if (ret < 0)
11302                 goto out;
11303
11304         /*
11305          * Search afterwards to get correct one
11306          * NOTE: As we must do a comprehensive check on the data backref to
11307          * make sure the dref count also matches, we must iterate all file
11308          * extents for that inode.
11309          */
11310         while (1) {
11311                 leaf = path.nodes[0];
11312                 slot = path.slots[0];
11313
11314                 if (slot >= btrfs_header_nritems(leaf))
11315                         goto next;
11316                 btrfs_item_key_to_cpu(leaf, &key, slot);
11317                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11318                         break;
11319                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11320                 /*
11321                  * Except normal disk bytenr and disk num bytes, we still
11322                  * need to do extra check on dbackref offset as
11323                  * dbackref offset = file_offset - file_extent_offset
11324                  */
11325                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11326                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11327                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11328                     offset)
11329                         found_count++;
11330
11331 next:
11332                 ret = btrfs_next_item(root, &path);
11333                 if (ret)
11334                         break;
11335         }
11336 out:
11337         btrfs_release_path(&path);
11338         if (found_count != count) {
11339                 error(
11340 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11341                         bytenr, len, root_id, objectid, offset, count, found_count);
11342                 return REFERENCER_MISSING;
11343         }
11344         return 0;
11345 }
11346
11347 /*
11348  * Check if the referencer of a shared data backref exists
11349  */
11350 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11351                                      u64 parent, u64 bytenr)
11352 {
11353         struct extent_buffer *eb;
11354         struct btrfs_key key;
11355         struct btrfs_file_extent_item *fi;
11356         u32 nr;
11357         int found_parent = 0;
11358         int i;
11359
11360         eb = read_tree_block(fs_info, parent, 0);
11361         if (!extent_buffer_uptodate(eb))
11362                 goto out;
11363
11364         nr = btrfs_header_nritems(eb);
11365         for (i = 0; i < nr; i++) {
11366                 btrfs_item_key_to_cpu(eb, &key, i);
11367                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11368                         continue;
11369
11370                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11371                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11372                         continue;
11373
11374                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11375                         found_parent = 1;
11376                         break;
11377                 }
11378         }
11379
11380 out:
11381         free_extent_buffer(eb);
11382         if (!found_parent) {
11383                 error("shared extent %llu referencer lost (parent: %llu)",
11384                         bytenr, parent);
11385                 return REFERENCER_MISSING;
11386         }
11387         return 0;
11388 }
11389
11390 /*
11391  * This function will check a given extent item, including its backref and
11392  * itself (like crossing stripe boundary and type)
11393  *
11394  * Since we don't use extent_record anymore, introduce new error bit
11395  */
11396 static int check_extent_item(struct btrfs_fs_info *fs_info,
11397                              struct extent_buffer *eb, int slot)
11398 {
11399         struct btrfs_extent_item *ei;
11400         struct btrfs_extent_inline_ref *iref;
11401         struct btrfs_extent_data_ref *dref;
11402         unsigned long end;
11403         unsigned long ptr;
11404         int type;
11405         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11406         u32 item_size = btrfs_item_size_nr(eb, slot);
11407         u64 flags;
11408         u64 offset;
11409         int metadata = 0;
11410         int level;
11411         struct btrfs_key key;
11412         int ret;
11413         int err = 0;
11414
11415         btrfs_item_key_to_cpu(eb, &key, slot);
11416         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11417                 bytes_used += key.offset;
11418         else
11419                 bytes_used += nodesize;
11420
11421         if (item_size < sizeof(*ei)) {
11422                 /*
11423                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11424                  * old thing when on disk format is still un-determined.
11425                  * No need to care about it anymore
11426                  */
11427                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11428                 return -ENOTTY;
11429         }
11430
11431         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11432         flags = btrfs_extent_flags(eb, ei);
11433
11434         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11435                 metadata = 1;
11436         if (metadata && check_crossing_stripes(global_info, key.objectid,
11437                                                eb->len)) {
11438                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11439                       key.objectid, key.objectid + nodesize);
11440                 err |= CROSSING_STRIPE_BOUNDARY;
11441         }
11442
11443         ptr = (unsigned long)(ei + 1);
11444
11445         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11446                 /* Old EXTENT_ITEM metadata */
11447                 struct btrfs_tree_block_info *info;
11448
11449                 info = (struct btrfs_tree_block_info *)ptr;
11450                 level = btrfs_tree_block_level(eb, info);
11451                 ptr += sizeof(struct btrfs_tree_block_info);
11452         } else {
11453                 /* New METADATA_ITEM */
11454                 level = key.offset;
11455         }
11456         end = (unsigned long)ei + item_size;
11457
11458 next:
11459         /* Reached extent item end normally */
11460         if (ptr == end)
11461                 goto out;
11462
11463         /* Beyond extent item end, wrong item size */
11464         if (ptr > end) {
11465                 err |= ITEM_SIZE_MISMATCH;
11466                 error("extent item at bytenr %llu slot %d has wrong size",
11467                         eb->start, slot);
11468                 goto out;
11469         }
11470
11471         /* Now check every backref in this extent item */
11472         iref = (struct btrfs_extent_inline_ref *)ptr;
11473         type = btrfs_extent_inline_ref_type(eb, iref);
11474         offset = btrfs_extent_inline_ref_offset(eb, iref);
11475         switch (type) {
11476         case BTRFS_TREE_BLOCK_REF_KEY:
11477                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11478                                                level);
11479                 err |= ret;
11480                 break;
11481         case BTRFS_SHARED_BLOCK_REF_KEY:
11482                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11483                                                  level);
11484                 err |= ret;
11485                 break;
11486         case BTRFS_EXTENT_DATA_REF_KEY:
11487                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11488                 ret = check_extent_data_backref(fs_info,
11489                                 btrfs_extent_data_ref_root(eb, dref),
11490                                 btrfs_extent_data_ref_objectid(eb, dref),
11491                                 btrfs_extent_data_ref_offset(eb, dref),
11492                                 key.objectid, key.offset,
11493                                 btrfs_extent_data_ref_count(eb, dref));
11494                 err |= ret;
11495                 break;
11496         case BTRFS_SHARED_DATA_REF_KEY:
11497                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11498                 err |= ret;
11499                 break;
11500         default:
11501                 error("extent[%llu %d %llu] has unknown ref type: %d",
11502                         key.objectid, key.type, key.offset, type);
11503                 err |= UNKNOWN_TYPE;
11504                 goto out;
11505         }
11506
11507         ptr += btrfs_extent_inline_ref_size(type);
11508         goto next;
11509
11510 out:
11511         return err;
11512 }
11513
11514 /*
11515  * Check if a dev extent item is referred correctly by its chunk
11516  */
11517 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11518                                  struct extent_buffer *eb, int slot)
11519 {
11520         struct btrfs_root *chunk_root = fs_info->chunk_root;
11521         struct btrfs_dev_extent *ptr;
11522         struct btrfs_path path;
11523         struct btrfs_key chunk_key;
11524         struct btrfs_key devext_key;
11525         struct btrfs_chunk *chunk;
11526         struct extent_buffer *l;
11527         int num_stripes;
11528         u64 length;
11529         int i;
11530         int found_chunk = 0;
11531         int ret;
11532
11533         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11534         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11535         length = btrfs_dev_extent_length(eb, ptr);
11536
11537         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11538         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11539         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11540
11541         btrfs_init_path(&path);
11542         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11543         if (ret)
11544                 goto out;
11545
11546         l = path.nodes[0];
11547         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11548         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11549                                       chunk_key.offset);
11550         if (ret < 0)
11551                 goto out;
11552
11553         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11554                 goto out;
11555
11556         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11557         for (i = 0; i < num_stripes; i++) {
11558                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11559                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11560
11561                 if (devid == devext_key.objectid &&
11562                     offset == devext_key.offset) {
11563                         found_chunk = 1;
11564                         break;
11565                 }
11566         }
11567 out:
11568         btrfs_release_path(&path);
11569         if (!found_chunk) {
11570                 error(
11571                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11572                         devext_key.objectid, devext_key.offset, length);
11573                 return REFERENCER_MISSING;
11574         }
11575         return 0;
11576 }
11577
11578 /*
11579  * Check if the used space is correct with the dev item
11580  */
11581 static int check_dev_item(struct btrfs_fs_info *fs_info,
11582                           struct extent_buffer *eb, int slot)
11583 {
11584         struct btrfs_root *dev_root = fs_info->dev_root;
11585         struct btrfs_dev_item *dev_item;
11586         struct btrfs_path path;
11587         struct btrfs_key key;
11588         struct btrfs_dev_extent *ptr;
11589         u64 dev_id;
11590         u64 used;
11591         u64 total = 0;
11592         int ret;
11593
11594         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11595         dev_id = btrfs_device_id(eb, dev_item);
11596         used = btrfs_device_bytes_used(eb, dev_item);
11597
11598         key.objectid = dev_id;
11599         key.type = BTRFS_DEV_EXTENT_KEY;
11600         key.offset = 0;
11601
11602         btrfs_init_path(&path);
11603         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11604         if (ret < 0) {
11605                 btrfs_item_key_to_cpu(eb, &key, slot);
11606                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11607                         key.objectid, key.type, key.offset);
11608                 btrfs_release_path(&path);
11609                 return REFERENCER_MISSING;
11610         }
11611
11612         /* Iterate dev_extents to calculate the used space of a device */
11613         while (1) {
11614                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11615                         goto next;
11616
11617                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11618                 if (key.objectid > dev_id)
11619                         break;
11620                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11621                         goto next;
11622
11623                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11624                                      struct btrfs_dev_extent);
11625                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11626 next:
11627                 ret = btrfs_next_item(dev_root, &path);
11628                 if (ret)
11629                         break;
11630         }
11631         btrfs_release_path(&path);
11632
11633         if (used != total) {
11634                 btrfs_item_key_to_cpu(eb, &key, slot);
11635                 error(
11636 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11637                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11638                         BTRFS_DEV_EXTENT_KEY, dev_id);
11639                 return ACCOUNTING_MISMATCH;
11640         }
11641         return 0;
11642 }
11643
11644 /*
11645  * Check a block group item with its referener (chunk) and its used space
11646  * with extent/metadata item
11647  */
11648 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11649                                   struct extent_buffer *eb, int slot)
11650 {
11651         struct btrfs_root *extent_root = fs_info->extent_root;
11652         struct btrfs_root *chunk_root = fs_info->chunk_root;
11653         struct btrfs_block_group_item *bi;
11654         struct btrfs_block_group_item bg_item;
11655         struct btrfs_path path;
11656         struct btrfs_key bg_key;
11657         struct btrfs_key chunk_key;
11658         struct btrfs_key extent_key;
11659         struct btrfs_chunk *chunk;
11660         struct extent_buffer *leaf;
11661         struct btrfs_extent_item *ei;
11662         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11663         u64 flags;
11664         u64 bg_flags;
11665         u64 used;
11666         u64 total = 0;
11667         int ret;
11668         int err = 0;
11669
11670         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11671         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11672         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11673         used = btrfs_block_group_used(&bg_item);
11674         bg_flags = btrfs_block_group_flags(&bg_item);
11675
11676         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11677         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11678         chunk_key.offset = bg_key.objectid;
11679
11680         btrfs_init_path(&path);
11681         /* Search for the referencer chunk */
11682         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11683         if (ret) {
11684                 error(
11685                 "block group[%llu %llu] did not find the related chunk item",
11686                         bg_key.objectid, bg_key.offset);
11687                 err |= REFERENCER_MISSING;
11688         } else {
11689                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11690                                         struct btrfs_chunk);
11691                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11692                                                 bg_key.offset) {
11693                         error(
11694         "block group[%llu %llu] related chunk item length does not match",
11695                                 bg_key.objectid, bg_key.offset);
11696                         err |= REFERENCER_MISMATCH;
11697                 }
11698         }
11699         btrfs_release_path(&path);
11700
11701         /* Search from the block group bytenr */
11702         extent_key.objectid = bg_key.objectid;
11703         extent_key.type = 0;
11704         extent_key.offset = 0;
11705
11706         btrfs_init_path(&path);
11707         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11708         if (ret < 0)
11709                 goto out;
11710
11711         /* Iterate extent tree to account used space */
11712         while (1) {
11713                 leaf = path.nodes[0];
11714
11715                 /* Search slot can point to the last item beyond leaf nritems */
11716                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11717                         goto next;
11718
11719                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11720                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11721                         break;
11722
11723                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11724                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11725                         goto next;
11726                 if (extent_key.objectid < bg_key.objectid)
11727                         goto next;
11728
11729                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11730                         total += nodesize;
11731                 else
11732                         total += extent_key.offset;
11733
11734                 ei = btrfs_item_ptr(leaf, path.slots[0],
11735                                     struct btrfs_extent_item);
11736                 flags = btrfs_extent_flags(leaf, ei);
11737                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11738                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11739                                 error(
11740                         "bad extent[%llu, %llu) type mismatch with chunk",
11741                                         extent_key.objectid,
11742                                         extent_key.objectid + extent_key.offset);
11743                                 err |= CHUNK_TYPE_MISMATCH;
11744                         }
11745                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11746                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11747                                     BTRFS_BLOCK_GROUP_METADATA))) {
11748                                 error(
11749                         "bad extent[%llu, %llu) type mismatch with chunk",
11750                                         extent_key.objectid,
11751                                         extent_key.objectid + nodesize);
11752                                 err |= CHUNK_TYPE_MISMATCH;
11753                         }
11754                 }
11755 next:
11756                 ret = btrfs_next_item(extent_root, &path);
11757                 if (ret)
11758                         break;
11759         }
11760
11761 out:
11762         btrfs_release_path(&path);
11763
11764         if (total != used) {
11765                 error(
11766                 "block group[%llu %llu] used %llu but extent items used %llu",
11767                         bg_key.objectid, bg_key.offset, used, total);
11768                 err |= ACCOUNTING_MISMATCH;
11769         }
11770         return err;
11771 }
11772
11773 /*
11774  * Check a chunk item.
11775  * Including checking all referred dev_extents and block group
11776  */
11777 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11778                             struct extent_buffer *eb, int slot)
11779 {
11780         struct btrfs_root *extent_root = fs_info->extent_root;
11781         struct btrfs_root *dev_root = fs_info->dev_root;
11782         struct btrfs_path path;
11783         struct btrfs_key chunk_key;
11784         struct btrfs_key bg_key;
11785         struct btrfs_key devext_key;
11786         struct btrfs_chunk *chunk;
11787         struct extent_buffer *leaf;
11788         struct btrfs_block_group_item *bi;
11789         struct btrfs_block_group_item bg_item;
11790         struct btrfs_dev_extent *ptr;
11791         u64 length;
11792         u64 chunk_end;
11793         u64 stripe_len;
11794         u64 type;
11795         int num_stripes;
11796         u64 offset;
11797         u64 objectid;
11798         int i;
11799         int ret;
11800         int err = 0;
11801
11802         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11803         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11804         length = btrfs_chunk_length(eb, chunk);
11805         chunk_end = chunk_key.offset + length;
11806         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11807                                       chunk_key.offset);
11808         if (ret < 0) {
11809                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11810                         chunk_end);
11811                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11812                 goto out;
11813         }
11814         type = btrfs_chunk_type(eb, chunk);
11815
11816         bg_key.objectid = chunk_key.offset;
11817         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11818         bg_key.offset = length;
11819
11820         btrfs_init_path(&path);
11821         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11822         if (ret) {
11823                 error(
11824                 "chunk[%llu %llu) did not find the related block group item",
11825                         chunk_key.offset, chunk_end);
11826                 err |= REFERENCER_MISSING;
11827         } else{
11828                 leaf = path.nodes[0];
11829                 bi = btrfs_item_ptr(leaf, path.slots[0],
11830                                     struct btrfs_block_group_item);
11831                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11832                                    sizeof(bg_item));
11833                 if (btrfs_block_group_flags(&bg_item) != type) {
11834                         error(
11835 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11836                                 chunk_key.offset, chunk_end, type,
11837                                 btrfs_block_group_flags(&bg_item));
11838                         err |= REFERENCER_MISSING;
11839                 }
11840         }
11841
11842         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11843         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11844         for (i = 0; i < num_stripes; i++) {
11845                 btrfs_release_path(&path);
11846                 btrfs_init_path(&path);
11847                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11848                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11849                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11850
11851                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11852                                         0, 0);
11853                 if (ret)
11854                         goto not_match_dev;
11855
11856                 leaf = path.nodes[0];
11857                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11858                                      struct btrfs_dev_extent);
11859                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11860                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11861                 if (objectid != chunk_key.objectid ||
11862                     offset != chunk_key.offset ||
11863                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11864                         goto not_match_dev;
11865                 continue;
11866 not_match_dev:
11867                 err |= BACKREF_MISSING;
11868                 error(
11869                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11870                         chunk_key.objectid, chunk_end, i);
11871                 continue;
11872         }
11873         btrfs_release_path(&path);
11874 out:
11875         return err;
11876 }
11877
11878 /*
11879  * Main entry function to check known items and update related accounting info
11880  */
11881 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11882 {
11883         struct btrfs_fs_info *fs_info = root->fs_info;
11884         struct btrfs_key key;
11885         int slot = 0;
11886         int type;
11887         struct btrfs_extent_data_ref *dref;
11888         int ret;
11889         int err = 0;
11890
11891 next:
11892         btrfs_item_key_to_cpu(eb, &key, slot);
11893         type = key.type;
11894
11895         switch (type) {
11896         case BTRFS_EXTENT_DATA_KEY:
11897                 ret = check_extent_data_item(root, eb, slot);
11898                 err |= ret;
11899                 break;
11900         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11901                 ret = check_block_group_item(fs_info, eb, slot);
11902                 err |= ret;
11903                 break;
11904         case BTRFS_DEV_ITEM_KEY:
11905                 ret = check_dev_item(fs_info, eb, slot);
11906                 err |= ret;
11907                 break;
11908         case BTRFS_CHUNK_ITEM_KEY:
11909                 ret = check_chunk_item(fs_info, eb, slot);
11910                 err |= ret;
11911                 break;
11912         case BTRFS_DEV_EXTENT_KEY:
11913                 ret = check_dev_extent_item(fs_info, eb, slot);
11914                 err |= ret;
11915                 break;
11916         case BTRFS_EXTENT_ITEM_KEY:
11917         case BTRFS_METADATA_ITEM_KEY:
11918                 ret = check_extent_item(fs_info, eb, slot);
11919                 err |= ret;
11920                 break;
11921         case BTRFS_EXTENT_CSUM_KEY:
11922                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11923                 break;
11924         case BTRFS_TREE_BLOCK_REF_KEY:
11925                 ret = check_tree_block_backref(fs_info, key.offset,
11926                                                key.objectid, -1);
11927                 err |= ret;
11928                 break;
11929         case BTRFS_EXTENT_DATA_REF_KEY:
11930                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11931                 ret = check_extent_data_backref(fs_info,
11932                                 btrfs_extent_data_ref_root(eb, dref),
11933                                 btrfs_extent_data_ref_objectid(eb, dref),
11934                                 btrfs_extent_data_ref_offset(eb, dref),
11935                                 key.objectid, 0,
11936                                 btrfs_extent_data_ref_count(eb, dref));
11937                 err |= ret;
11938                 break;
11939         case BTRFS_SHARED_BLOCK_REF_KEY:
11940                 ret = check_shared_block_backref(fs_info, key.offset,
11941                                                  key.objectid, -1);
11942                 err |= ret;
11943                 break;
11944         case BTRFS_SHARED_DATA_REF_KEY:
11945                 ret = check_shared_data_backref(fs_info, key.offset,
11946                                                 key.objectid);
11947                 err |= ret;
11948                 break;
11949         default:
11950                 break;
11951         }
11952
11953         if (++slot < btrfs_header_nritems(eb))
11954                 goto next;
11955
11956         return err;
11957 }
11958
11959 /*
11960  * Helper function for later fs/subvol tree check.  To determine if a tree
11961  * block should be checked.
11962  * This function will ensure only the direct referencer with lowest rootid to
11963  * check a fs/subvolume tree block.
11964  *
11965  * Backref check at extent tree would detect errors like missing subvolume
11966  * tree, so we can do aggressive check to reduce duplicated checks.
11967  */
11968 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11969 {
11970         struct btrfs_root *extent_root = root->fs_info->extent_root;
11971         struct btrfs_key key;
11972         struct btrfs_path path;
11973         struct extent_buffer *leaf;
11974         int slot;
11975         struct btrfs_extent_item *ei;
11976         unsigned long ptr;
11977         unsigned long end;
11978         int type;
11979         u32 item_size;
11980         u64 offset;
11981         struct btrfs_extent_inline_ref *iref;
11982         int ret;
11983
11984         btrfs_init_path(&path);
11985         key.objectid = btrfs_header_bytenr(eb);
11986         key.type = BTRFS_METADATA_ITEM_KEY;
11987         key.offset = (u64)-1;
11988
11989         /*
11990          * Any failure in backref resolving means we can't determine
11991          * whom the tree block belongs to.
11992          * So in that case, we need to check that tree block
11993          */
11994         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11995         if (ret < 0)
11996                 goto need_check;
11997
11998         ret = btrfs_previous_extent_item(extent_root, &path,
11999                                          btrfs_header_bytenr(eb));
12000         if (ret)
12001                 goto need_check;
12002
12003         leaf = path.nodes[0];
12004         slot = path.slots[0];
12005         btrfs_item_key_to_cpu(leaf, &key, slot);
12006         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12007
12008         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12009                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12010         } else {
12011                 struct btrfs_tree_block_info *info;
12012
12013                 info = (struct btrfs_tree_block_info *)(ei + 1);
12014                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12015         }
12016
12017         item_size = btrfs_item_size_nr(leaf, slot);
12018         ptr = (unsigned long)iref;
12019         end = (unsigned long)ei + item_size;
12020         while (ptr < end) {
12021                 iref = (struct btrfs_extent_inline_ref *)ptr;
12022                 type = btrfs_extent_inline_ref_type(leaf, iref);
12023                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12024
12025                 /*
12026                  * We only check the tree block if current root is
12027                  * the lowest referencer of it.
12028                  */
12029                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12030                     offset < root->objectid) {
12031                         btrfs_release_path(&path);
12032                         return 0;
12033                 }
12034
12035                 ptr += btrfs_extent_inline_ref_size(type);
12036         }
12037         /*
12038          * Normally we should also check keyed tree block ref, but that may be
12039          * very time consuming.  Inlined ref should already make us skip a lot
12040          * of refs now.  So skip search keyed tree block ref.
12041          */
12042
12043 need_check:
12044         btrfs_release_path(&path);
12045         return 1;
12046 }
12047
12048 /*
12049  * Traversal function for tree block. We will do:
12050  * 1) Skip shared fs/subvolume tree blocks
12051  * 2) Update related bytes accounting
12052  * 3) Pre-order traversal
12053  */
12054 static int traverse_tree_block(struct btrfs_root *root,
12055                                 struct extent_buffer *node)
12056 {
12057         struct extent_buffer *eb;
12058         struct btrfs_key key;
12059         struct btrfs_key drop_key;
12060         int level;
12061         u64 nr;
12062         int i;
12063         int err = 0;
12064         int ret;
12065
12066         /*
12067          * Skip shared fs/subvolume tree block, in that case they will
12068          * be checked by referencer with lowest rootid
12069          */
12070         if (is_fstree(root->objectid) && !should_check(root, node))
12071                 return 0;
12072
12073         /* Update bytes accounting */
12074         total_btree_bytes += node->len;
12075         if (fs_root_objectid(btrfs_header_owner(node)))
12076                 total_fs_tree_bytes += node->len;
12077         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12078                 total_extent_tree_bytes += node->len;
12079
12080         /* pre-order tranversal, check itself first */
12081         level = btrfs_header_level(node);
12082         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12083                                    btrfs_header_level(node),
12084                                    btrfs_header_owner(node));
12085         err |= ret;
12086         if (err)
12087                 error(
12088         "check %s failed root %llu bytenr %llu level %d, force continue check",
12089                         level ? "node":"leaf", root->objectid,
12090                         btrfs_header_bytenr(node), btrfs_header_level(node));
12091
12092         if (!level) {
12093                 btree_space_waste += btrfs_leaf_free_space(root, node);
12094                 ret = check_leaf_items(root, node);
12095                 err |= ret;
12096                 return err;
12097         }
12098
12099         nr = btrfs_header_nritems(node);
12100         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12101         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12102                 sizeof(struct btrfs_key_ptr);
12103
12104         /* Then check all its children */
12105         for (i = 0; i < nr; i++) {
12106                 u64 blocknr = btrfs_node_blockptr(node, i);
12107
12108                 btrfs_node_key_to_cpu(node, &key, i);
12109                 if (level == root->root_item.drop_level &&
12110                     is_dropped_key(&key, &drop_key))
12111                         continue;
12112
12113                 /*
12114                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12115                  * to call the function itself.
12116                  */
12117                 eb = read_tree_block(root->fs_info, blocknr, 0);
12118                 if (extent_buffer_uptodate(eb)) {
12119                         ret = traverse_tree_block(root, eb);
12120                         err |= ret;
12121                 }
12122                 free_extent_buffer(eb);
12123         }
12124
12125         return err;
12126 }
12127
12128 /*
12129  * Low memory usage version check_chunks_and_extents.
12130  */
12131 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12132 {
12133         struct btrfs_path path;
12134         struct btrfs_key key;
12135         struct btrfs_root *root1;
12136         struct btrfs_root *root;
12137         struct btrfs_root *cur_root;
12138         int err = 0;
12139         int ret;
12140
12141         root = fs_info->fs_root;
12142
12143         root1 = root->fs_info->chunk_root;
12144         ret = traverse_tree_block(root1, root1->node);
12145         err |= ret;
12146
12147         root1 = root->fs_info->tree_root;
12148         ret = traverse_tree_block(root1, root1->node);
12149         err |= ret;
12150
12151         btrfs_init_path(&path);
12152         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12153         key.offset = 0;
12154         key.type = BTRFS_ROOT_ITEM_KEY;
12155
12156         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12157         if (ret) {
12158                 error("cannot find extent treet in tree_root");
12159                 goto out;
12160         }
12161
12162         while (1) {
12163                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12164                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12165                         goto next;
12166                 key.offset = (u64)-1;
12167
12168                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12169                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12170                                         &key);
12171                 else
12172                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12173                 if (IS_ERR(cur_root) || !cur_root) {
12174                         error("failed to read tree: %lld", key.objectid);
12175                         goto next;
12176                 }
12177
12178                 ret = traverse_tree_block(cur_root, cur_root->node);
12179                 err |= ret;
12180
12181                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12182                         btrfs_free_fs_root(cur_root);
12183 next:
12184                 ret = btrfs_next_item(root1, &path);
12185                 if (ret)
12186                         goto out;
12187         }
12188
12189 out:
12190         btrfs_release_path(&path);
12191         return err;
12192 }
12193
12194 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12195 {
12196         int ret;
12197
12198         if (!ctx.progress_enabled)
12199                 fprintf(stderr, "checking extents\n");
12200         if (check_mode == CHECK_MODE_LOWMEM)
12201                 ret = check_chunks_and_extents_v2(fs_info);
12202         else
12203                 ret = check_chunks_and_extents(fs_info);
12204
12205         return ret;
12206 }
12207
12208 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12209                            struct btrfs_root *root, int overwrite)
12210 {
12211         struct extent_buffer *c;
12212         struct extent_buffer *old = root->node;
12213         int level;
12214         int ret;
12215         struct btrfs_disk_key disk_key = {0,0,0};
12216
12217         level = 0;
12218
12219         if (overwrite) {
12220                 c = old;
12221                 extent_buffer_get(c);
12222                 goto init;
12223         }
12224         c = btrfs_alloc_free_block(trans, root,
12225                                    root->fs_info->nodesize,
12226                                    root->root_key.objectid,
12227                                    &disk_key, level, 0, 0);
12228         if (IS_ERR(c)) {
12229                 c = old;
12230                 extent_buffer_get(c);
12231                 overwrite = 1;
12232         }
12233 init:
12234         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12235         btrfs_set_header_level(c, level);
12236         btrfs_set_header_bytenr(c, c->start);
12237         btrfs_set_header_generation(c, trans->transid);
12238         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12239         btrfs_set_header_owner(c, root->root_key.objectid);
12240
12241         write_extent_buffer(c, root->fs_info->fsid,
12242                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12243
12244         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12245                             btrfs_header_chunk_tree_uuid(c),
12246                             BTRFS_UUID_SIZE);
12247
12248         btrfs_mark_buffer_dirty(c);
12249         /*
12250          * this case can happen in the following case:
12251          *
12252          * 1.overwrite previous root.
12253          *
12254          * 2.reinit reloc data root, this is because we skip pin
12255          * down reloc data tree before which means we can allocate
12256          * same block bytenr here.
12257          */
12258         if (old->start == c->start) {
12259                 btrfs_set_root_generation(&root->root_item,
12260                                           trans->transid);
12261                 root->root_item.level = btrfs_header_level(root->node);
12262                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12263                                         &root->root_key, &root->root_item);
12264                 if (ret) {
12265                         free_extent_buffer(c);
12266                         return ret;
12267                 }
12268         }
12269         free_extent_buffer(old);
12270         root->node = c;
12271         add_root_to_dirty_list(root);
12272         return 0;
12273 }
12274
12275 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12276                                 struct extent_buffer *eb, int tree_root)
12277 {
12278         struct extent_buffer *tmp;
12279         struct btrfs_root_item *ri;
12280         struct btrfs_key key;
12281         u64 bytenr;
12282         int level = btrfs_header_level(eb);
12283         int nritems;
12284         int ret;
12285         int i;
12286
12287         /*
12288          * If we have pinned this block before, don't pin it again.
12289          * This can not only avoid forever loop with broken filesystem
12290          * but also give us some speedups.
12291          */
12292         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12293                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12294                 return 0;
12295
12296         btrfs_pin_extent(fs_info, eb->start, eb->len);
12297
12298         nritems = btrfs_header_nritems(eb);
12299         for (i = 0; i < nritems; i++) {
12300                 if (level == 0) {
12301                         btrfs_item_key_to_cpu(eb, &key, i);
12302                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12303                                 continue;
12304                         /* Skip the extent root and reloc roots */
12305                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12306                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12307                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12308                                 continue;
12309                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12310                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12311
12312                         /*
12313                          * If at any point we start needing the real root we
12314                          * will have to build a stump root for the root we are
12315                          * in, but for now this doesn't actually use the root so
12316                          * just pass in extent_root.
12317                          */
12318                         tmp = read_tree_block(fs_info, bytenr, 0);
12319                         if (!extent_buffer_uptodate(tmp)) {
12320                                 fprintf(stderr, "Error reading root block\n");
12321                                 return -EIO;
12322                         }
12323                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12324                         free_extent_buffer(tmp);
12325                         if (ret)
12326                                 return ret;
12327                 } else {
12328                         bytenr = btrfs_node_blockptr(eb, i);
12329
12330                         /* If we aren't the tree root don't read the block */
12331                         if (level == 1 && !tree_root) {
12332                                 btrfs_pin_extent(fs_info, bytenr,
12333                                                 fs_info->nodesize);
12334                                 continue;
12335                         }
12336
12337                         tmp = read_tree_block(fs_info, bytenr, 0);
12338                         if (!extent_buffer_uptodate(tmp)) {
12339                                 fprintf(stderr, "Error reading tree block\n");
12340                                 return -EIO;
12341                         }
12342                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12343                         free_extent_buffer(tmp);
12344                         if (ret)
12345                                 return ret;
12346                 }
12347         }
12348
12349         return 0;
12350 }
12351
12352 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12353 {
12354         int ret;
12355
12356         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12357         if (ret)
12358                 return ret;
12359
12360         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12361 }
12362
12363 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12364 {
12365         struct btrfs_block_group_cache *cache;
12366         struct btrfs_path path;
12367         struct extent_buffer *leaf;
12368         struct btrfs_chunk *chunk;
12369         struct btrfs_key key;
12370         int ret;
12371         u64 start;
12372
12373         btrfs_init_path(&path);
12374         key.objectid = 0;
12375         key.type = BTRFS_CHUNK_ITEM_KEY;
12376         key.offset = 0;
12377         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12378         if (ret < 0) {
12379                 btrfs_release_path(&path);
12380                 return ret;
12381         }
12382
12383         /*
12384          * We do this in case the block groups were screwed up and had alloc
12385          * bits that aren't actually set on the chunks.  This happens with
12386          * restored images every time and could happen in real life I guess.
12387          */
12388         fs_info->avail_data_alloc_bits = 0;
12389         fs_info->avail_metadata_alloc_bits = 0;
12390         fs_info->avail_system_alloc_bits = 0;
12391
12392         /* First we need to create the in-memory block groups */
12393         while (1) {
12394                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12395                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12396                         if (ret < 0) {
12397                                 btrfs_release_path(&path);
12398                                 return ret;
12399                         }
12400                         if (ret) {
12401                                 ret = 0;
12402                                 break;
12403                         }
12404                 }
12405                 leaf = path.nodes[0];
12406                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12407                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12408                         path.slots[0]++;
12409                         continue;
12410                 }
12411
12412                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12413                 btrfs_add_block_group(fs_info, 0,
12414                                       btrfs_chunk_type(leaf, chunk),
12415                                       key.objectid, key.offset,
12416                                       btrfs_chunk_length(leaf, chunk));
12417                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12418                                  key.offset + btrfs_chunk_length(leaf, chunk));
12419                 path.slots[0]++;
12420         }
12421         start = 0;
12422         while (1) {
12423                 cache = btrfs_lookup_first_block_group(fs_info, start);
12424                 if (!cache)
12425                         break;
12426                 cache->cached = 1;
12427                 start = cache->key.objectid + cache->key.offset;
12428         }
12429
12430         btrfs_release_path(&path);
12431         return 0;
12432 }
12433
12434 static int reset_balance(struct btrfs_trans_handle *trans,
12435                          struct btrfs_fs_info *fs_info)
12436 {
12437         struct btrfs_root *root = fs_info->tree_root;
12438         struct btrfs_path path;
12439         struct extent_buffer *leaf;
12440         struct btrfs_key key;
12441         int del_slot, del_nr = 0;
12442         int ret;
12443         int found = 0;
12444
12445         btrfs_init_path(&path);
12446         key.objectid = BTRFS_BALANCE_OBJECTID;
12447         key.type = BTRFS_BALANCE_ITEM_KEY;
12448         key.offset = 0;
12449         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12450         if (ret) {
12451                 if (ret > 0)
12452                         ret = 0;
12453                 if (!ret)
12454                         goto reinit_data_reloc;
12455                 else
12456                         goto out;
12457         }
12458
12459         ret = btrfs_del_item(trans, root, &path);
12460         if (ret)
12461                 goto out;
12462         btrfs_release_path(&path);
12463
12464         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12465         key.type = BTRFS_ROOT_ITEM_KEY;
12466         key.offset = 0;
12467         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12468         if (ret < 0)
12469                 goto out;
12470         while (1) {
12471                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12472                         if (!found)
12473                                 break;
12474
12475                         if (del_nr) {
12476                                 ret = btrfs_del_items(trans, root, &path,
12477                                                       del_slot, del_nr);
12478                                 del_nr = 0;
12479                                 if (ret)
12480                                         goto out;
12481                         }
12482                         key.offset++;
12483                         btrfs_release_path(&path);
12484
12485                         found = 0;
12486                         ret = btrfs_search_slot(trans, root, &key, &path,
12487                                                 -1, 1);
12488                         if (ret < 0)
12489                                 goto out;
12490                         continue;
12491                 }
12492                 found = 1;
12493                 leaf = path.nodes[0];
12494                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12495                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12496                         break;
12497                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12498                         path.slots[0]++;
12499                         continue;
12500                 }
12501                 if (!del_nr) {
12502                         del_slot = path.slots[0];
12503                         del_nr = 1;
12504                 } else {
12505                         del_nr++;
12506                 }
12507                 path.slots[0]++;
12508         }
12509
12510         if (del_nr) {
12511                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12512                 if (ret)
12513                         goto out;
12514         }
12515         btrfs_release_path(&path);
12516
12517 reinit_data_reloc:
12518         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12519         key.type = BTRFS_ROOT_ITEM_KEY;
12520         key.offset = (u64)-1;
12521         root = btrfs_read_fs_root(fs_info, &key);
12522         if (IS_ERR(root)) {
12523                 fprintf(stderr, "Error reading data reloc tree\n");
12524                 ret = PTR_ERR(root);
12525                 goto out;
12526         }
12527         record_root_in_trans(trans, root);
12528         ret = btrfs_fsck_reinit_root(trans, root, 0);
12529         if (ret)
12530                 goto out;
12531         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12532 out:
12533         btrfs_release_path(&path);
12534         return ret;
12535 }
12536
12537 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12538                               struct btrfs_fs_info *fs_info)
12539 {
12540         u64 start = 0;
12541         int ret;
12542
12543         /*
12544          * The only reason we don't do this is because right now we're just
12545          * walking the trees we find and pinning down their bytes, we don't look
12546          * at any of the leaves.  In order to do mixed groups we'd have to check
12547          * the leaves of any fs roots and pin down the bytes for any file
12548          * extents we find.  Not hard but why do it if we don't have to?
12549          */
12550         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12551                 fprintf(stderr, "We don't support re-initing the extent tree "
12552                         "for mixed block groups yet, please notify a btrfs "
12553                         "developer you want to do this so they can add this "
12554                         "functionality.\n");
12555                 return -EINVAL;
12556         }
12557
12558         /*
12559          * first we need to walk all of the trees except the extent tree and pin
12560          * down the bytes that are in use so we don't overwrite any existing
12561          * metadata.
12562          */
12563         ret = pin_metadata_blocks(fs_info);
12564         if (ret) {
12565                 fprintf(stderr, "error pinning down used bytes\n");
12566                 return ret;
12567         }
12568
12569         /*
12570          * Need to drop all the block groups since we're going to recreate all
12571          * of them again.
12572          */
12573         btrfs_free_block_groups(fs_info);
12574         ret = reset_block_groups(fs_info);
12575         if (ret) {
12576                 fprintf(stderr, "error resetting the block groups\n");
12577                 return ret;
12578         }
12579
12580         /* Ok we can allocate now, reinit the extent root */
12581         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12582         if (ret) {
12583                 fprintf(stderr, "extent root initialization failed\n");
12584                 /*
12585                  * When the transaction code is updated we should end the
12586                  * transaction, but for now progs only knows about commit so
12587                  * just return an error.
12588                  */
12589                 return ret;
12590         }
12591
12592         /*
12593          * Now we have all the in-memory block groups setup so we can make
12594          * allocations properly, and the metadata we care about is safe since we
12595          * pinned all of it above.
12596          */
12597         while (1) {
12598                 struct btrfs_block_group_cache *cache;
12599
12600                 cache = btrfs_lookup_first_block_group(fs_info, start);
12601                 if (!cache)
12602                         break;
12603                 start = cache->key.objectid + cache->key.offset;
12604                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12605                                         &cache->key, &cache->item,
12606                                         sizeof(cache->item));
12607                 if (ret) {
12608                         fprintf(stderr, "Error adding block group\n");
12609                         return ret;
12610                 }
12611                 btrfs_extent_post_op(trans, fs_info->extent_root);
12612         }
12613
12614         ret = reset_balance(trans, fs_info);
12615         if (ret)
12616                 fprintf(stderr, "error resetting the pending balance\n");
12617
12618         return ret;
12619 }
12620
12621 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12622 {
12623         struct btrfs_path path;
12624         struct btrfs_trans_handle *trans;
12625         struct btrfs_key key;
12626         int ret;
12627
12628         printf("Recowing metadata block %llu\n", eb->start);
12629         key.objectid = btrfs_header_owner(eb);
12630         key.type = BTRFS_ROOT_ITEM_KEY;
12631         key.offset = (u64)-1;
12632
12633         root = btrfs_read_fs_root(root->fs_info, &key);
12634         if (IS_ERR(root)) {
12635                 fprintf(stderr, "Couldn't find owner root %llu\n",
12636                         key.objectid);
12637                 return PTR_ERR(root);
12638         }
12639
12640         trans = btrfs_start_transaction(root, 1);
12641         if (IS_ERR(trans))
12642                 return PTR_ERR(trans);
12643
12644         btrfs_init_path(&path);
12645         path.lowest_level = btrfs_header_level(eb);
12646         if (path.lowest_level)
12647                 btrfs_node_key_to_cpu(eb, &key, 0);
12648         else
12649                 btrfs_item_key_to_cpu(eb, &key, 0);
12650
12651         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12652         btrfs_commit_transaction(trans, root);
12653         btrfs_release_path(&path);
12654         return ret;
12655 }
12656
12657 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12658 {
12659         struct btrfs_path path;
12660         struct btrfs_trans_handle *trans;
12661         struct btrfs_key key;
12662         int ret;
12663
12664         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12665                bad->key.type, bad->key.offset);
12666         key.objectid = bad->root_id;
12667         key.type = BTRFS_ROOT_ITEM_KEY;
12668         key.offset = (u64)-1;
12669
12670         root = btrfs_read_fs_root(root->fs_info, &key);
12671         if (IS_ERR(root)) {
12672                 fprintf(stderr, "Couldn't find owner root %llu\n",
12673                         key.objectid);
12674                 return PTR_ERR(root);
12675         }
12676
12677         trans = btrfs_start_transaction(root, 1);
12678         if (IS_ERR(trans))
12679                 return PTR_ERR(trans);
12680
12681         btrfs_init_path(&path);
12682         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12683         if (ret) {
12684                 if (ret > 0)
12685                         ret = 0;
12686                 goto out;
12687         }
12688         ret = btrfs_del_item(trans, root, &path);
12689 out:
12690         btrfs_commit_transaction(trans, root);
12691         btrfs_release_path(&path);
12692         return ret;
12693 }
12694
12695 static int zero_log_tree(struct btrfs_root *root)
12696 {
12697         struct btrfs_trans_handle *trans;
12698         int ret;
12699
12700         trans = btrfs_start_transaction(root, 1);
12701         if (IS_ERR(trans)) {
12702                 ret = PTR_ERR(trans);
12703                 return ret;
12704         }
12705         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12706         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12707         ret = btrfs_commit_transaction(trans, root);
12708         return ret;
12709 }
12710
12711 static int populate_csum(struct btrfs_trans_handle *trans,
12712                          struct btrfs_root *csum_root, char *buf, u64 start,
12713                          u64 len)
12714 {
12715         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12716         u64 offset = 0;
12717         u64 sectorsize;
12718         int ret = 0;
12719
12720         while (offset < len) {
12721                 sectorsize = fs_info->sectorsize;
12722                 ret = read_extent_data(fs_info, buf, start + offset,
12723                                        &sectorsize, 0);
12724                 if (ret)
12725                         break;
12726                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12727                                             start + offset, buf, sectorsize);
12728                 if (ret)
12729                         break;
12730                 offset += sectorsize;
12731         }
12732         return ret;
12733 }
12734
12735 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12736                                       struct btrfs_root *csum_root,
12737                                       struct btrfs_root *cur_root)
12738 {
12739         struct btrfs_path path;
12740         struct btrfs_key key;
12741         struct extent_buffer *node;
12742         struct btrfs_file_extent_item *fi;
12743         char *buf = NULL;
12744         u64 start = 0;
12745         u64 len = 0;
12746         int slot = 0;
12747         int ret = 0;
12748
12749         buf = malloc(cur_root->fs_info->sectorsize);
12750         if (!buf)
12751                 return -ENOMEM;
12752
12753         btrfs_init_path(&path);
12754         key.objectid = 0;
12755         key.offset = 0;
12756         key.type = 0;
12757         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12758         if (ret < 0)
12759                 goto out;
12760         /* Iterate all regular file extents and fill its csum */
12761         while (1) {
12762                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12763
12764                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12765                         goto next;
12766                 node = path.nodes[0];
12767                 slot = path.slots[0];
12768                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12769                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12770                         goto next;
12771                 start = btrfs_file_extent_disk_bytenr(node, fi);
12772                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12773
12774                 ret = populate_csum(trans, csum_root, buf, start, len);
12775                 if (ret == -EEXIST)
12776                         ret = 0;
12777                 if (ret < 0)
12778                         goto out;
12779 next:
12780                 /*
12781                  * TODO: if next leaf is corrupted, jump to nearest next valid
12782                  * leaf.
12783                  */
12784                 ret = btrfs_next_item(cur_root, &path);
12785                 if (ret < 0)
12786                         goto out;
12787                 if (ret > 0) {
12788                         ret = 0;
12789                         goto out;
12790                 }
12791         }
12792
12793 out:
12794         btrfs_release_path(&path);
12795         free(buf);
12796         return ret;
12797 }
12798
12799 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12800                                   struct btrfs_root *csum_root)
12801 {
12802         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12803         struct btrfs_path path;
12804         struct btrfs_root *tree_root = fs_info->tree_root;
12805         struct btrfs_root *cur_root;
12806         struct extent_buffer *node;
12807         struct btrfs_key key;
12808         int slot = 0;
12809         int ret = 0;
12810
12811         btrfs_init_path(&path);
12812         key.objectid = BTRFS_FS_TREE_OBJECTID;
12813         key.offset = 0;
12814         key.type = BTRFS_ROOT_ITEM_KEY;
12815         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12816         if (ret < 0)
12817                 goto out;
12818         if (ret > 0) {
12819                 ret = -ENOENT;
12820                 goto out;
12821         }
12822
12823         while (1) {
12824                 node = path.nodes[0];
12825                 slot = path.slots[0];
12826                 btrfs_item_key_to_cpu(node, &key, slot);
12827                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12828                         goto out;
12829                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12830                         goto next;
12831                 if (!is_fstree(key.objectid))
12832                         goto next;
12833                 key.offset = (u64)-1;
12834
12835                 cur_root = btrfs_read_fs_root(fs_info, &key);
12836                 if (IS_ERR(cur_root) || !cur_root) {
12837                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12838                                 key.objectid);
12839                         goto out;
12840                 }
12841                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12842                                 cur_root);
12843                 if (ret < 0)
12844                         goto out;
12845 next:
12846                 ret = btrfs_next_item(tree_root, &path);
12847                 if (ret > 0) {
12848                         ret = 0;
12849                         goto out;
12850                 }
12851                 if (ret < 0)
12852                         goto out;
12853         }
12854
12855 out:
12856         btrfs_release_path(&path);
12857         return ret;
12858 }
12859
12860 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12861                                       struct btrfs_root *csum_root)
12862 {
12863         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12864         struct btrfs_path path;
12865         struct btrfs_extent_item *ei;
12866         struct extent_buffer *leaf;
12867         char *buf;
12868         struct btrfs_key key;
12869         int ret;
12870
12871         btrfs_init_path(&path);
12872         key.objectid = 0;
12873         key.type = BTRFS_EXTENT_ITEM_KEY;
12874         key.offset = 0;
12875         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12876         if (ret < 0) {
12877                 btrfs_release_path(&path);
12878                 return ret;
12879         }
12880
12881         buf = malloc(csum_root->fs_info->sectorsize);
12882         if (!buf) {
12883                 btrfs_release_path(&path);
12884                 return -ENOMEM;
12885         }
12886
12887         while (1) {
12888                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12889                         ret = btrfs_next_leaf(extent_root, &path);
12890                         if (ret < 0)
12891                                 break;
12892                         if (ret) {
12893                                 ret = 0;
12894                                 break;
12895                         }
12896                 }
12897                 leaf = path.nodes[0];
12898
12899                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12900                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12901                         path.slots[0]++;
12902                         continue;
12903                 }
12904
12905                 ei = btrfs_item_ptr(leaf, path.slots[0],
12906                                     struct btrfs_extent_item);
12907                 if (!(btrfs_extent_flags(leaf, ei) &
12908                       BTRFS_EXTENT_FLAG_DATA)) {
12909                         path.slots[0]++;
12910                         continue;
12911                 }
12912
12913                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12914                                     key.offset);
12915                 if (ret)
12916                         break;
12917                 path.slots[0]++;
12918         }
12919
12920         btrfs_release_path(&path);
12921         free(buf);
12922         return ret;
12923 }
12924
12925 /*
12926  * Recalculate the csum and put it into the csum tree.
12927  *
12928  * Extent tree init will wipe out all the extent info, so in that case, we
12929  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12930  * will use fs/subvol trees to init the csum tree.
12931  */
12932 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12933                           struct btrfs_root *csum_root,
12934                           int search_fs_tree)
12935 {
12936         if (search_fs_tree)
12937                 return fill_csum_tree_from_fs(trans, csum_root);
12938         else
12939                 return fill_csum_tree_from_extent(trans, csum_root);
12940 }
12941
12942 static void free_roots_info_cache(void)
12943 {
12944         if (!roots_info_cache)
12945                 return;
12946
12947         while (!cache_tree_empty(roots_info_cache)) {
12948                 struct cache_extent *entry;
12949                 struct root_item_info *rii;
12950
12951                 entry = first_cache_extent(roots_info_cache);
12952                 if (!entry)
12953                         break;
12954                 remove_cache_extent(roots_info_cache, entry);
12955                 rii = container_of(entry, struct root_item_info, cache_extent);
12956                 free(rii);
12957         }
12958
12959         free(roots_info_cache);
12960         roots_info_cache = NULL;
12961 }
12962
12963 static int build_roots_info_cache(struct btrfs_fs_info *info)
12964 {
12965         int ret = 0;
12966         struct btrfs_key key;
12967         struct extent_buffer *leaf;
12968         struct btrfs_path path;
12969
12970         if (!roots_info_cache) {
12971                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12972                 if (!roots_info_cache)
12973                         return -ENOMEM;
12974                 cache_tree_init(roots_info_cache);
12975         }
12976
12977         btrfs_init_path(&path);
12978         key.objectid = 0;
12979         key.type = BTRFS_EXTENT_ITEM_KEY;
12980         key.offset = 0;
12981         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12982         if (ret < 0)
12983                 goto out;
12984         leaf = path.nodes[0];
12985
12986         while (1) {
12987                 struct btrfs_key found_key;
12988                 struct btrfs_extent_item *ei;
12989                 struct btrfs_extent_inline_ref *iref;
12990                 int slot = path.slots[0];
12991                 int type;
12992                 u64 flags;
12993                 u64 root_id;
12994                 u8 level;
12995                 struct cache_extent *entry;
12996                 struct root_item_info *rii;
12997
12998                 if (slot >= btrfs_header_nritems(leaf)) {
12999                         ret = btrfs_next_leaf(info->extent_root, &path);
13000                         if (ret < 0) {
13001                                 break;
13002                         } else if (ret) {
13003                                 ret = 0;
13004                                 break;
13005                         }
13006                         leaf = path.nodes[0];
13007                         slot = path.slots[0];
13008                 }
13009
13010                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13011
13012                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13013                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13014                         goto next;
13015
13016                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13017                 flags = btrfs_extent_flags(leaf, ei);
13018
13019                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13020                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13021                         goto next;
13022
13023                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13024                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13025                         level = found_key.offset;
13026                 } else {
13027                         struct btrfs_tree_block_info *binfo;
13028
13029                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13030                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13031                         level = btrfs_tree_block_level(leaf, binfo);
13032                 }
13033
13034                 /*
13035                  * For a root extent, it must be of the following type and the
13036                  * first (and only one) iref in the item.
13037                  */
13038                 type = btrfs_extent_inline_ref_type(leaf, iref);
13039                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13040                         goto next;
13041
13042                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13043                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13044                 if (!entry) {
13045                         rii = malloc(sizeof(struct root_item_info));
13046                         if (!rii) {
13047                                 ret = -ENOMEM;
13048                                 goto out;
13049                         }
13050                         rii->cache_extent.start = root_id;
13051                         rii->cache_extent.size = 1;
13052                         rii->level = (u8)-1;
13053                         entry = &rii->cache_extent;
13054                         ret = insert_cache_extent(roots_info_cache, entry);
13055                         ASSERT(ret == 0);
13056                 } else {
13057                         rii = container_of(entry, struct root_item_info,
13058                                            cache_extent);
13059                 }
13060
13061                 ASSERT(rii->cache_extent.start == root_id);
13062                 ASSERT(rii->cache_extent.size == 1);
13063
13064                 if (level > rii->level || rii->level == (u8)-1) {
13065                         rii->level = level;
13066                         rii->bytenr = found_key.objectid;
13067                         rii->gen = btrfs_extent_generation(leaf, ei);
13068                         rii->node_count = 1;
13069                 } else if (level == rii->level) {
13070                         rii->node_count++;
13071                 }
13072 next:
13073                 path.slots[0]++;
13074         }
13075
13076 out:
13077         btrfs_release_path(&path);
13078
13079         return ret;
13080 }
13081
13082 static int maybe_repair_root_item(struct btrfs_path *path,
13083                                   const struct btrfs_key *root_key,
13084                                   const int read_only_mode)
13085 {
13086         const u64 root_id = root_key->objectid;
13087         struct cache_extent *entry;
13088         struct root_item_info *rii;
13089         struct btrfs_root_item ri;
13090         unsigned long offset;
13091
13092         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13093         if (!entry) {
13094                 fprintf(stderr,
13095                         "Error: could not find extent items for root %llu\n",
13096                         root_key->objectid);
13097                 return -ENOENT;
13098         }
13099
13100         rii = container_of(entry, struct root_item_info, cache_extent);
13101         ASSERT(rii->cache_extent.start == root_id);
13102         ASSERT(rii->cache_extent.size == 1);
13103
13104         if (rii->node_count != 1) {
13105                 fprintf(stderr,
13106                         "Error: could not find btree root extent for root %llu\n",
13107                         root_id);
13108                 return -ENOENT;
13109         }
13110
13111         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13112         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13113
13114         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13115             btrfs_root_level(&ri) != rii->level ||
13116             btrfs_root_generation(&ri) != rii->gen) {
13117
13118                 /*
13119                  * If we're in repair mode but our caller told us to not update
13120                  * the root item, i.e. just check if it needs to be updated, don't
13121                  * print this message, since the caller will call us again shortly
13122                  * for the same root item without read only mode (the caller will
13123                  * open a transaction first).
13124                  */
13125                 if (!(read_only_mode && repair))
13126                         fprintf(stderr,
13127                                 "%sroot item for root %llu,"
13128                                 " current bytenr %llu, current gen %llu, current level %u,"
13129                                 " new bytenr %llu, new gen %llu, new level %u\n",
13130                                 (read_only_mode ? "" : "fixing "),
13131                                 root_id,
13132                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13133                                 btrfs_root_level(&ri),
13134                                 rii->bytenr, rii->gen, rii->level);
13135
13136                 if (btrfs_root_generation(&ri) > rii->gen) {
13137                         fprintf(stderr,
13138                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13139                                 root_id, btrfs_root_generation(&ri), rii->gen);
13140                         return -EINVAL;
13141                 }
13142
13143                 if (!read_only_mode) {
13144                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13145                         btrfs_set_root_level(&ri, rii->level);
13146                         btrfs_set_root_generation(&ri, rii->gen);
13147                         write_extent_buffer(path->nodes[0], &ri,
13148                                             offset, sizeof(ri));
13149                 }
13150
13151                 return 1;
13152         }
13153
13154         return 0;
13155 }
13156
13157 /*
13158  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13159  * caused read-only snapshots to be corrupted if they were created at a moment
13160  * when the source subvolume/snapshot had orphan items. The issue was that the
13161  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13162  * node instead of the post orphan cleanup root node.
13163  * So this function, and its callees, just detects and fixes those cases. Even
13164  * though the regression was for read-only snapshots, this function applies to
13165  * any snapshot/subvolume root.
13166  * This must be run before any other repair code - not doing it so, makes other
13167  * repair code delete or modify backrefs in the extent tree for example, which
13168  * will result in an inconsistent fs after repairing the root items.
13169  */
13170 static int repair_root_items(struct btrfs_fs_info *info)
13171 {
13172         struct btrfs_path path;
13173         struct btrfs_key key;
13174         struct extent_buffer *leaf;
13175         struct btrfs_trans_handle *trans = NULL;
13176         int ret = 0;
13177         int bad_roots = 0;
13178         int need_trans = 0;
13179
13180         btrfs_init_path(&path);
13181
13182         ret = build_roots_info_cache(info);
13183         if (ret)
13184                 goto out;
13185
13186         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13187         key.type = BTRFS_ROOT_ITEM_KEY;
13188         key.offset = 0;
13189
13190 again:
13191         /*
13192          * Avoid opening and committing transactions if a leaf doesn't have
13193          * any root items that need to be fixed, so that we avoid rotating
13194          * backup roots unnecessarily.
13195          */
13196         if (need_trans) {
13197                 trans = btrfs_start_transaction(info->tree_root, 1);
13198                 if (IS_ERR(trans)) {
13199                         ret = PTR_ERR(trans);
13200                         goto out;
13201                 }
13202         }
13203
13204         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13205                                 0, trans ? 1 : 0);
13206         if (ret < 0)
13207                 goto out;
13208         leaf = path.nodes[0];
13209
13210         while (1) {
13211                 struct btrfs_key found_key;
13212
13213                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13214                         int no_more_keys = find_next_key(&path, &key);
13215
13216                         btrfs_release_path(&path);
13217                         if (trans) {
13218                                 ret = btrfs_commit_transaction(trans,
13219                                                                info->tree_root);
13220                                 trans = NULL;
13221                                 if (ret < 0)
13222                                         goto out;
13223                         }
13224                         need_trans = 0;
13225                         if (no_more_keys)
13226                                 break;
13227                         goto again;
13228                 }
13229
13230                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13231
13232                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13233                         goto next;
13234                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13235                         goto next;
13236
13237                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13238                 if (ret < 0)
13239                         goto out;
13240                 if (ret) {
13241                         if (!trans && repair) {
13242                                 need_trans = 1;
13243                                 key = found_key;
13244                                 btrfs_release_path(&path);
13245                                 goto again;
13246                         }
13247                         bad_roots++;
13248                 }
13249 next:
13250                 path.slots[0]++;
13251         }
13252         ret = 0;
13253 out:
13254         free_roots_info_cache();
13255         btrfs_release_path(&path);
13256         if (trans)
13257                 btrfs_commit_transaction(trans, info->tree_root);
13258         if (ret < 0)
13259                 return ret;
13260
13261         return bad_roots;
13262 }
13263
13264 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13265 {
13266         struct btrfs_trans_handle *trans;
13267         struct btrfs_block_group_cache *bg_cache;
13268         u64 current = 0;
13269         int ret = 0;
13270
13271         /* Clear all free space cache inodes and its extent data */
13272         while (1) {
13273                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13274                 if (!bg_cache)
13275                         break;
13276                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13277                 if (ret < 0)
13278                         return ret;
13279                 current = bg_cache->key.objectid + bg_cache->key.offset;
13280         }
13281
13282         /* Don't forget to set cache_generation to -1 */
13283         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13284         if (IS_ERR(trans)) {
13285                 error("failed to update super block cache generation");
13286                 return PTR_ERR(trans);
13287         }
13288         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13289         btrfs_commit_transaction(trans, fs_info->tree_root);
13290
13291         return ret;
13292 }
13293
13294 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13295                 int clear_version)
13296 {
13297         int ret = 0;
13298
13299         if (clear_version == 1) {
13300                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13301                         error(
13302                 "free space cache v2 detected, use --clear-space-cache v2");
13303                         ret = 1;
13304                         goto close_out;
13305                 }
13306                 printf("Clearing free space cache\n");
13307                 ret = clear_free_space_cache(fs_info);
13308                 if (ret) {
13309                         error("failed to clear free space cache");
13310                         ret = 1;
13311                 } else {
13312                         printf("Free space cache cleared\n");
13313                 }
13314         } else if (clear_version == 2) {
13315                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13316                         printf("no free space cache v2 to clear\n");
13317                         ret = 0;
13318                         goto close_out;
13319                 }
13320                 printf("Clear free space cache v2\n");
13321                 ret = btrfs_clear_free_space_tree(fs_info);
13322                 if (ret) {
13323                         error("failed to clear free space cache v2: %d", ret);
13324                         ret = 1;
13325                 } else {
13326                         printf("free space cache v2 cleared\n");
13327                 }
13328         }
13329 close_out:
13330         return ret;
13331 }
13332
13333 const char * const cmd_check_usage[] = {
13334         "btrfs check [options] <device>",
13335         "Check structural integrity of a filesystem (unmounted).",
13336         "Check structural integrity of an unmounted filesystem. Verify internal",
13337         "trees' consistency and item connectivity. In the repair mode try to",
13338         "fix the problems found. ",
13339         "WARNING: the repair mode is considered dangerous",
13340         "",
13341         "-s|--super <superblock>     use this superblock copy",
13342         "-b|--backup                 use the first valid backup root copy",
13343         "--force                     skip mount checks, repair is not possible",
13344         "--repair                    try to repair the filesystem",
13345         "--readonly                  run in read-only mode (default)",
13346         "--init-csum-tree            create a new CRC tree",
13347         "--init-extent-tree          create a new extent tree",
13348         "--mode <MODE>               allows choice of memory/IO trade-offs",
13349         "                            where MODE is one of:",
13350         "                            original - read inodes and extents to memory (requires",
13351         "                                       more memory, does less IO)",
13352         "                            lowmem   - try to use less memory but read blocks again",
13353         "                                       when needed",
13354         "--check-data-csum           verify checksums of data blocks",
13355         "-Q|--qgroup-report          print a report on qgroup consistency",
13356         "-E|--subvol-extents <subvolid>",
13357         "                            print subvolume extents and sharing state",
13358         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13359         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13360         "-p|--progress               indicate progress",
13361         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13362         NULL
13363 };
13364
13365 int cmd_check(int argc, char **argv)
13366 {
13367         struct cache_tree root_cache;
13368         struct btrfs_root *root;
13369         struct btrfs_fs_info *info;
13370         u64 bytenr = 0;
13371         u64 subvolid = 0;
13372         u64 tree_root_bytenr = 0;
13373         u64 chunk_root_bytenr = 0;
13374         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13375         int ret = 0;
13376         int err = 0;
13377         u64 num;
13378         int init_csum_tree = 0;
13379         int readonly = 0;
13380         int clear_space_cache = 0;
13381         int qgroup_report = 0;
13382         int qgroups_repaired = 0;
13383         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13384         int force = 0;
13385
13386         while(1) {
13387                 int c;
13388                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13389                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13390                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13391                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13392                         GETOPT_VAL_FORCE };
13393                 static const struct option long_options[] = {
13394                         { "super", required_argument, NULL, 's' },
13395                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13396                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13397                         { "init-csum-tree", no_argument, NULL,
13398                                 GETOPT_VAL_INIT_CSUM },
13399                         { "init-extent-tree", no_argument, NULL,
13400                                 GETOPT_VAL_INIT_EXTENT },
13401                         { "check-data-csum", no_argument, NULL,
13402                                 GETOPT_VAL_CHECK_CSUM },
13403                         { "backup", no_argument, NULL, 'b' },
13404                         { "subvol-extents", required_argument, NULL, 'E' },
13405                         { "qgroup-report", no_argument, NULL, 'Q' },
13406                         { "tree-root", required_argument, NULL, 'r' },
13407                         { "chunk-root", required_argument, NULL,
13408                                 GETOPT_VAL_CHUNK_TREE },
13409                         { "progress", no_argument, NULL, 'p' },
13410                         { "mode", required_argument, NULL,
13411                                 GETOPT_VAL_MODE },
13412                         { "clear-space-cache", required_argument, NULL,
13413                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13414                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13415                         { NULL, 0, NULL, 0}
13416                 };
13417
13418                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13419                 if (c < 0)
13420                         break;
13421                 switch(c) {
13422                         case 'a': /* ignored */ break;
13423                         case 'b':
13424                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13425                                 break;
13426                         case 's':
13427                                 num = arg_strtou64(optarg);
13428                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13429                                         error(
13430                                         "super mirror should be less than %d",
13431                                                 BTRFS_SUPER_MIRROR_MAX);
13432                                         exit(1);
13433                                 }
13434                                 bytenr = btrfs_sb_offset(((int)num));
13435                                 printf("using SB copy %llu, bytenr %llu\n", num,
13436                                        (unsigned long long)bytenr);
13437                                 break;
13438                         case 'Q':
13439                                 qgroup_report = 1;
13440                                 break;
13441                         case 'E':
13442                                 subvolid = arg_strtou64(optarg);
13443                                 break;
13444                         case 'r':
13445                                 tree_root_bytenr = arg_strtou64(optarg);
13446                                 break;
13447                         case GETOPT_VAL_CHUNK_TREE:
13448                                 chunk_root_bytenr = arg_strtou64(optarg);
13449                                 break;
13450                         case 'p':
13451                                 ctx.progress_enabled = true;
13452                                 break;
13453                         case '?':
13454                         case 'h':
13455                                 usage(cmd_check_usage);
13456                         case GETOPT_VAL_REPAIR:
13457                                 printf("enabling repair mode\n");
13458                                 repair = 1;
13459                                 ctree_flags |= OPEN_CTREE_WRITES;
13460                                 break;
13461                         case GETOPT_VAL_READONLY:
13462                                 readonly = 1;
13463                                 break;
13464                         case GETOPT_VAL_INIT_CSUM:
13465                                 printf("Creating a new CRC tree\n");
13466                                 init_csum_tree = 1;
13467                                 repair = 1;
13468                                 ctree_flags |= OPEN_CTREE_WRITES;
13469                                 break;
13470                         case GETOPT_VAL_INIT_EXTENT:
13471                                 init_extent_tree = 1;
13472                                 ctree_flags |= (OPEN_CTREE_WRITES |
13473                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13474                                 repair = 1;
13475                                 break;
13476                         case GETOPT_VAL_CHECK_CSUM:
13477                                 check_data_csum = 1;
13478                                 break;
13479                         case GETOPT_VAL_MODE:
13480                                 check_mode = parse_check_mode(optarg);
13481                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13482                                         error("unknown mode: %s", optarg);
13483                                         exit(1);
13484                                 }
13485                                 break;
13486                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13487                                 if (strcmp(optarg, "v1") == 0) {
13488                                         clear_space_cache = 1;
13489                                 } else if (strcmp(optarg, "v2") == 0) {
13490                                         clear_space_cache = 2;
13491                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13492                                 } else {
13493                                         error(
13494                 "invalid argument to --clear-space-cache, must be v1 or v2");
13495                                         exit(1);
13496                                 }
13497                                 ctree_flags |= OPEN_CTREE_WRITES;
13498                                 break;
13499                         case GETOPT_VAL_FORCE:
13500                                 force = 1;
13501                                 break;
13502                 }
13503         }
13504
13505         if (check_argc_exact(argc - optind, 1))
13506                 usage(cmd_check_usage);
13507
13508         if (ctx.progress_enabled) {
13509                 ctx.tp = TASK_NOTHING;
13510                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13511         }
13512
13513         /* This check is the only reason for --readonly to exist */
13514         if (readonly && repair) {
13515                 error("repair options are not compatible with --readonly");
13516                 exit(1);
13517         }
13518
13519         /*
13520          * experimental and dangerous
13521          */
13522         if (repair && check_mode == CHECK_MODE_LOWMEM)
13523                 warning("low-memory mode repair support is only partial");
13524
13525         radix_tree_init();
13526         cache_tree_init(&root_cache);
13527
13528         ret = check_mounted(argv[optind]);
13529         if (!force) {
13530                 if (ret < 0) {
13531                         error("could not check mount status: %s",
13532                                         strerror(-ret));
13533                         err |= !!ret;
13534                         goto err_out;
13535                 } else if (ret) {
13536                         error(
13537 "%s is currently mounted, use --force if you really intend to check the filesystem",
13538                                 argv[optind]);
13539                         ret = -EBUSY;
13540                         err |= !!ret;
13541                         goto err_out;
13542                 }
13543         } else {
13544                 if (repair) {
13545                         error("repair and --force is not yet supported");
13546                         ret = 1;
13547                         err |= !!ret;
13548                         goto err_out;
13549                 }
13550                 if (ret < 0) {
13551                         warning(
13552 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13553                                 argv[optind]);
13554                 } else if (ret) {
13555                         warning(
13556                         "filesystem mounted, continuing because of --force");
13557                 }
13558                 /* A block device is mounted in exclusive mode by kernel */
13559                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13560         }
13561
13562         /* only allow partial opening under repair mode */
13563         if (repair)
13564                 ctree_flags |= OPEN_CTREE_PARTIAL;
13565
13566         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13567                                   chunk_root_bytenr, ctree_flags);
13568         if (!info) {
13569                 error("cannot open file system");
13570                 ret = -EIO;
13571                 err |= !!ret;
13572                 goto err_out;
13573         }
13574
13575         global_info = info;
13576         root = info->fs_root;
13577         uuid_unparse(info->super_copy->fsid, uuidbuf);
13578
13579         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13580
13581         /*
13582          * Check the bare minimum before starting anything else that could rely
13583          * on it, namely the tree roots, any local consistency checks
13584          */
13585         if (!extent_buffer_uptodate(info->tree_root->node) ||
13586             !extent_buffer_uptodate(info->dev_root->node) ||
13587             !extent_buffer_uptodate(info->chunk_root->node)) {
13588                 error("critical roots corrupted, unable to check the filesystem");
13589                 err |= !!ret;
13590                 ret = -EIO;
13591                 goto close_out;
13592         }
13593
13594         if (clear_space_cache) {
13595                 ret = do_clear_free_space_cache(info, clear_space_cache);
13596                 err |= !!ret;
13597                 goto close_out;
13598         }
13599
13600         /*
13601          * repair mode will force us to commit transaction which
13602          * will make us fail to load log tree when mounting.
13603          */
13604         if (repair && btrfs_super_log_root(info->super_copy)) {
13605                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13606                 if (!ret) {
13607                         ret = 1;
13608                         err |= !!ret;
13609                         goto close_out;
13610                 }
13611                 ret = zero_log_tree(root);
13612                 err |= !!ret;
13613                 if (ret) {
13614                         error("failed to zero log tree: %d", ret);
13615                         goto close_out;
13616                 }
13617         }
13618
13619         if (qgroup_report) {
13620                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13621                        uuidbuf);
13622                 ret = qgroup_verify_all(info);
13623                 err |= !!ret;
13624                 if (ret == 0)
13625                         report_qgroups(1);
13626                 goto close_out;
13627         }
13628         if (subvolid) {
13629                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13630                        subvolid, argv[optind], uuidbuf);
13631                 ret = print_extent_state(info, subvolid);
13632                 err |= !!ret;
13633                 goto close_out;
13634         }
13635
13636         if (init_extent_tree || init_csum_tree) {
13637                 struct btrfs_trans_handle *trans;
13638
13639                 trans = btrfs_start_transaction(info->extent_root, 0);
13640                 if (IS_ERR(trans)) {
13641                         error("error starting transaction");
13642                         ret = PTR_ERR(trans);
13643                         err |= !!ret;
13644                         goto close_out;
13645                 }
13646
13647                 if (init_extent_tree) {
13648                         printf("Creating a new extent tree\n");
13649                         ret = reinit_extent_tree(trans, info);
13650                         err |= !!ret;
13651                         if (ret)
13652                                 goto close_out;
13653                 }
13654
13655                 if (init_csum_tree) {
13656                         printf("Reinitialize checksum tree\n");
13657                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13658                         if (ret) {
13659                                 error("checksum tree initialization failed: %d",
13660                                                 ret);
13661                                 ret = -EIO;
13662                                 err |= !!ret;
13663                                 goto close_out;
13664                         }
13665
13666                         ret = fill_csum_tree(trans, info->csum_root,
13667                                              init_extent_tree);
13668                         err |= !!ret;
13669                         if (ret) {
13670                                 error("checksum tree refilling failed: %d", ret);
13671                                 return -EIO;
13672                         }
13673                 }
13674                 /*
13675                  * Ok now we commit and run the normal fsck, which will add
13676                  * extent entries for all of the items it finds.
13677                  */
13678                 ret = btrfs_commit_transaction(trans, info->extent_root);
13679                 err |= !!ret;
13680                 if (ret)
13681                         goto close_out;
13682         }
13683         if (!extent_buffer_uptodate(info->extent_root->node)) {
13684                 error("critical: extent_root, unable to check the filesystem");
13685                 ret = -EIO;
13686                 err |= !!ret;
13687                 goto close_out;
13688         }
13689         if (!extent_buffer_uptodate(info->csum_root->node)) {
13690                 error("critical: csum_root, unable to check the filesystem");
13691                 ret = -EIO;
13692                 err |= !!ret;
13693                 goto close_out;
13694         }
13695
13696         ret = do_check_chunks_and_extents(info);
13697         err |= !!ret;
13698         if (ret)
13699                 error(
13700                 "errors found in extent allocation tree or chunk allocation");
13701
13702         ret = repair_root_items(info);
13703         err |= !!ret;
13704         if (ret < 0) {
13705                 error("failed to repair root items: %s", strerror(-ret));
13706                 goto close_out;
13707         }
13708         if (repair) {
13709                 fprintf(stderr, "Fixed %d roots.\n", ret);
13710                 ret = 0;
13711         } else if (ret > 0) {
13712                 fprintf(stderr,
13713                        "Found %d roots with an outdated root item.\n",
13714                        ret);
13715                 fprintf(stderr,
13716                         "Please run a filesystem check with the option --repair to fix them.\n");
13717                 ret = 1;
13718                 err |= !!ret;
13719                 goto close_out;
13720         }
13721
13722         if (!ctx.progress_enabled) {
13723                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13724                         fprintf(stderr, "checking free space tree\n");
13725                 else
13726                         fprintf(stderr, "checking free space cache\n");
13727         }
13728         ret = check_space_cache(root);
13729         err |= !!ret;
13730         if (ret) {
13731                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13732                         error("errors found in free space tree");
13733                 else
13734                         error("errors found in free space cache");
13735                 goto out;
13736         }
13737
13738         /*
13739          * We used to have to have these hole extents in between our real
13740          * extents so if we don't have this flag set we need to make sure there
13741          * are no gaps in the file extents for inodes, otherwise we can just
13742          * ignore it when this happens.
13743          */
13744         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13745         ret = do_check_fs_roots(info, &root_cache);
13746         err |= !!ret;
13747         if (ret) {
13748                 error("errors found in fs roots");
13749                 goto out;
13750         }
13751
13752         fprintf(stderr, "checking csums\n");
13753         ret = check_csums(root);
13754         err |= !!ret;
13755         if (ret) {
13756                 error("errors found in csum tree");
13757                 goto out;
13758         }
13759
13760         fprintf(stderr, "checking root refs\n");
13761         /* For low memory mode, check_fs_roots_v2 handles root refs */
13762         if (check_mode != CHECK_MODE_LOWMEM) {
13763                 ret = check_root_refs(root, &root_cache);
13764                 err |= !!ret;
13765                 if (ret) {
13766                         error("errors found in root refs");
13767                         goto out;
13768                 }
13769         }
13770
13771         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13772                 struct extent_buffer *eb;
13773
13774                 eb = list_first_entry(&root->fs_info->recow_ebs,
13775                                       struct extent_buffer, recow);
13776                 list_del_init(&eb->recow);
13777                 ret = recow_extent_buffer(root, eb);
13778                 err |= !!ret;
13779                 if (ret) {
13780                         error("fails to fix transid errors");
13781                         break;
13782                 }
13783         }
13784
13785         while (!list_empty(&delete_items)) {
13786                 struct bad_item *bad;
13787
13788                 bad = list_first_entry(&delete_items, struct bad_item, list);
13789                 list_del_init(&bad->list);
13790                 if (repair) {
13791                         ret = delete_bad_item(root, bad);
13792                         err |= !!ret;
13793                 }
13794                 free(bad);
13795         }
13796
13797         if (info->quota_enabled) {
13798                 fprintf(stderr, "checking quota groups\n");
13799                 ret = qgroup_verify_all(info);
13800                 err |= !!ret;
13801                 if (ret) {
13802                         error("failed to check quota groups");
13803                         goto out;
13804                 }
13805                 report_qgroups(0);
13806                 ret = repair_qgroups(info, &qgroups_repaired);
13807                 err |= !!ret;
13808                 if (err) {
13809                         error("failed to repair quota groups");
13810                         goto out;
13811                 }
13812                 ret = 0;
13813         }
13814
13815         if (!list_empty(&root->fs_info->recow_ebs)) {
13816                 error("transid errors in file system");
13817                 ret = 1;
13818                 err |= !!ret;
13819         }
13820 out:
13821         printf("found %llu bytes used, ",
13822                (unsigned long long)bytes_used);
13823         if (err)
13824                 printf("error(s) found\n");
13825         else
13826                 printf("no error found\n");
13827         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13828         printf("total tree bytes: %llu\n",
13829                (unsigned long long)total_btree_bytes);
13830         printf("total fs tree bytes: %llu\n",
13831                (unsigned long long)total_fs_tree_bytes);
13832         printf("total extent tree bytes: %llu\n",
13833                (unsigned long long)total_extent_tree_bytes);
13834         printf("btree space waste bytes: %llu\n",
13835                (unsigned long long)btree_space_waste);
13836         printf("file data blocks allocated: %llu\n referenced %llu\n",
13837                 (unsigned long long)data_bytes_allocated,
13838                 (unsigned long long)data_bytes_referenced);
13839
13840         free_qgroup_counts();
13841         free_root_recs_tree(&root_cache);
13842 close_out:
13843         close_ctree(root);
13844 err_out:
13845         if (ctx.progress_enabled)
13846                 task_deinit(ctx.info);
13847
13848         return err;
13849 }