e1fe9a4966be3718cc6a9702160928b8ad96e1ea
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 {
138         return container_of(back, struct data_backref, node);
139 }
140
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
142 {
143         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145         struct data_backref *back1 = to_data_backref(ext1);
146         struct data_backref *back2 = to_data_backref(ext2);
147
148         WARN_ON(!ext1->is_data);
149         WARN_ON(!ext2->is_data);
150
151         /* parent and root are a union, so this covers both */
152         if (back1->parent > back2->parent)
153                 return 1;
154         if (back1->parent < back2->parent)
155                 return -1;
156
157         /* This is a full backref and the parents match. */
158         if (back1->node.full_backref)
159                 return 0;
160
161         if (back1->owner > back2->owner)
162                 return 1;
163         if (back1->owner < back2->owner)
164                 return -1;
165
166         if (back1->offset > back2->offset)
167                 return 1;
168         if (back1->offset < back2->offset)
169                 return -1;
170
171         if (back1->found_ref && back2->found_ref) {
172                 if (back1->disk_bytenr > back2->disk_bytenr)
173                         return 1;
174                 if (back1->disk_bytenr < back2->disk_bytenr)
175                         return -1;
176
177                 if (back1->bytes > back2->bytes)
178                         return 1;
179                 if (back1->bytes < back2->bytes)
180                         return -1;
181         }
182
183         return 0;
184 }
185
186 /*
187  * Much like data_backref, just removed the undetermined members
188  * and change it to use list_head.
189  * During extent scan, it is stored in root->orphan_data_extent.
190  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
191  */
192 struct orphan_data_extent {
193         struct list_head list;
194         u64 root;
195         u64 objectid;
196         u64 offset;
197         u64 disk_bytenr;
198         u64 disk_len;
199 };
200
201 struct tree_backref {
202         struct extent_backref node;
203         union {
204                 u64 parent;
205                 u64 root;
206         };
207 };
208
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
210 {
211         return container_of(back, struct tree_backref, node);
212 }
213
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
215 {
216         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218         struct tree_backref *back1 = to_tree_backref(ext1);
219         struct tree_backref *back2 = to_tree_backref(ext2);
220
221         WARN_ON(ext1->is_data);
222         WARN_ON(ext2->is_data);
223
224         /* parent and root are a union, so this covers both */
225         if (back1->parent > back2->parent)
226                 return 1;
227         if (back1->parent < back2->parent)
228                 return -1;
229
230         return 0;
231 }
232
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
234 {
235         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
237
238         if (ext1->is_data > ext2->is_data)
239                 return 1;
240
241         if (ext1->is_data < ext2->is_data)
242                 return -1;
243
244         if (ext1->full_backref > ext2->full_backref)
245                 return 1;
246         if (ext1->full_backref < ext2->full_backref)
247                 return -1;
248
249         if (ext1->is_data)
250                 return compare_data_backref(node1, node2);
251         else
252                 return compare_tree_backref(node1, node2);
253 }
254
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
257
258 struct extent_record {
259         struct list_head backrefs;
260         struct list_head dups;
261         struct rb_root backref_tree;
262         struct list_head list;
263         struct cache_extent cache;
264         struct btrfs_disk_key parent_key;
265         u64 start;
266         u64 max_size;
267         u64 nr;
268         u64 refs;
269         u64 extent_item_refs;
270         u64 generation;
271         u64 parent_generation;
272         u64 info_objectid;
273         u32 num_duplicates;
274         u8 info_level;
275         unsigned int flag_block_full_backref:2;
276         unsigned int found_rec:1;
277         unsigned int content_checked:1;
278         unsigned int owner_ref_checked:1;
279         unsigned int is_root:1;
280         unsigned int metadata:1;
281         unsigned int bad_full_backref:1;
282         unsigned int crossing_stripes:1;
283         unsigned int wrong_chunk_type:1;
284 };
285
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
287 {
288         return container_of(entry, struct extent_record, list);
289 }
290
291 struct inode_backref {
292         struct list_head list;
293         unsigned int found_dir_item:1;
294         unsigned int found_dir_index:1;
295         unsigned int found_inode_ref:1;
296         u8 filetype;
297         u8 ref_type;
298         int errors;
299         u64 dir;
300         u64 index;
301         u16 namelen;
302         char name[0];
303 };
304
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
306 {
307         return list_entry(entry, struct inode_backref, list);
308 }
309
310 struct root_item_record {
311         struct list_head list;
312         u64 objectid;
313         u64 bytenr;
314         u64 last_snapshot;
315         u8 level;
316         u8 drop_level;
317         struct btrfs_key drop_key;
318 };
319
320 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
322 #define REF_ERR_NO_INODE_REF            (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
325 #define REF_ERR_DUP_INODE_REF           (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF             (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
333
334 struct file_extent_hole {
335         struct rb_node node;
336         u64 start;
337         u64 len;
338 };
339
340 struct inode_record {
341         struct list_head backrefs;
342         unsigned int checked:1;
343         unsigned int merging:1;
344         unsigned int found_inode_item:1;
345         unsigned int found_dir_item:1;
346         unsigned int found_file_extent:1;
347         unsigned int found_csum_item:1;
348         unsigned int some_csum_missing:1;
349         unsigned int nodatasum:1;
350         int errors;
351
352         u64 ino;
353         u32 nlink;
354         u32 imode;
355         u64 isize;
356         u64 nbytes;
357
358         u32 found_link;
359         u64 found_size;
360         u64 extent_start;
361         u64 extent_end;
362         struct rb_root holes;
363         struct list_head orphan_extents;
364
365         u32 refs;
366 };
367
368 #define I_ERR_NO_INODE_ITEM             (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
383
384 struct root_backref {
385         struct list_head list;
386         unsigned int found_dir_item:1;
387         unsigned int found_dir_index:1;
388         unsigned int found_back_ref:1;
389         unsigned int found_forward_ref:1;
390         unsigned int reachable:1;
391         int errors;
392         u64 ref_root;
393         u64 dir;
394         u64 index;
395         u16 namelen;
396         char name[0];
397 };
398
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
400 {
401         return list_entry(entry, struct root_backref, list);
402 }
403
404 struct root_record {
405         struct list_head backrefs;
406         struct cache_extent cache;
407         unsigned int found_root_item:1;
408         u64 objectid;
409         u32 found_ref;
410 };
411
412 struct ptr_node {
413         struct cache_extent cache;
414         void *data;
415 };
416
417 struct shared_node {
418         struct cache_extent cache;
419         struct cache_tree root_cache;
420         struct cache_tree inode_cache;
421         struct inode_record *current;
422         u32 refs;
423 };
424
425 struct block_info {
426         u64 start;
427         u32 size;
428 };
429
430 struct walk_control {
431         struct cache_tree shared;
432         struct shared_node *nodes[BTRFS_MAX_LEVEL];
433         int active_node;
434         int root_level;
435 };
436
437 struct bad_item {
438         struct btrfs_key key;
439         u64 root_id;
440         struct list_head list;
441 };
442
443 struct extent_entry {
444         u64 bytenr;
445         u64 bytes;
446         int count;
447         int broken;
448         struct list_head list;
449 };
450
451 struct root_item_info {
452         /* level of the root */
453         u8 level;
454         /* number of nodes at this level, must be 1 for a root */
455         int node_count;
456         u64 bytenr;
457         u64 gen;
458         struct cache_extent cache_extent;
459 };
460
461 /*
462  * Error bit for low memory mode check.
463  *
464  * Currently no caller cares about it yet.  Just internal use for error
465  * classification.
466  */
467 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH     (1 << 8)
477
478 static void *print_status_check(void *p)
479 {
480         struct task_ctx *priv = p;
481         const char work_indicator[] = { '.', 'o', 'O', 'o' };
482         uint32_t count = 0;
483         static char *task_position_string[] = {
484                 "checking extents",
485                 "checking free space cache",
486                 "checking fs roots",
487         };
488
489         task_period_start(priv->info, 1000 /* 1s */);
490
491         if (priv->tp == TASK_NOTHING)
492                 return NULL;
493
494         while (1) {
495                 printf("%s [%c]\r", task_position_string[priv->tp],
496                                 work_indicator[count % 4]);
497                 count++;
498                 fflush(stdout);
499                 task_period_wait(priv->info);
500         }
501         return NULL;
502 }
503
504 static int print_status_return(void *p)
505 {
506         printf("\n");
507         fflush(stdout);
508
509         return 0;
510 }
511
512 static enum btrfs_check_mode parse_check_mode(const char *str)
513 {
514         if (strcmp(str, "lowmem") == 0)
515                 return CHECK_MODE_LOWMEM;
516         if (strcmp(str, "orig") == 0)
517                 return CHECK_MODE_ORIGINAL;
518         if (strcmp(str, "original") == 0)
519                 return CHECK_MODE_ORIGINAL;
520
521         return CHECK_MODE_UNKNOWN;
522 }
523
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
526 {
527         struct file_extent_hole *hole;
528
529         if (RB_EMPTY_ROOT(holes))
530                 return (u64)-1;
531
532         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
533         return hole->start;
534 }
535
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
537 {
538         struct file_extent_hole *hole1;
539         struct file_extent_hole *hole2;
540
541         hole1 = rb_entry(node1, struct file_extent_hole, node);
542         hole2 = rb_entry(node2, struct file_extent_hole, node);
543
544         if (hole1->start > hole2->start)
545                 return -1;
546         if (hole1->start < hole2->start)
547                 return 1;
548         /* Now hole1->start == hole2->start */
549         if (hole1->len >= hole2->len)
550                 /*
551                  * Hole 1 will be merge center
552                  * Same hole will be merged later
553                  */
554                 return -1;
555         /* Hole 2 will be merge center */
556         return 1;
557 }
558
559 /*
560  * Add a hole to the record
561  *
562  * This will do hole merge for copy_file_extent_holes(),
563  * which will ensure there won't be continuous holes.
564  */
565 static int add_file_extent_hole(struct rb_root *holes,
566                                 u64 start, u64 len)
567 {
568         struct file_extent_hole *hole;
569         struct file_extent_hole *prev = NULL;
570         struct file_extent_hole *next = NULL;
571
572         hole = malloc(sizeof(*hole));
573         if (!hole)
574                 return -ENOMEM;
575         hole->start = start;
576         hole->len = len;
577         /* Since compare will not return 0, no -EEXIST will happen */
578         rb_insert(holes, &hole->node, compare_hole);
579
580         /* simple merge with previous hole */
581         if (rb_prev(&hole->node))
582                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
583                                 node);
584         if (prev && prev->start + prev->len >= hole->start) {
585                 hole->len = hole->start + hole->len - prev->start;
586                 hole->start = prev->start;
587                 rb_erase(&prev->node, holes);
588                 free(prev);
589                 prev = NULL;
590         }
591
592         /* iterate merge with next holes */
593         while (1) {
594                 if (!rb_next(&hole->node))
595                         break;
596                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
597                                         node);
598                 if (hole->start + hole->len >= next->start) {
599                         if (hole->start + hole->len <= next->start + next->len)
600                                 hole->len = next->start + next->len -
601                                             hole->start;
602                         rb_erase(&next->node, holes);
603                         free(next);
604                         next = NULL;
605                 } else
606                         break;
607         }
608         return 0;
609 }
610
611 static int compare_hole_range(struct rb_node *node, void *data)
612 {
613         struct file_extent_hole *hole;
614         u64 start;
615
616         hole = (struct file_extent_hole *)data;
617         start = hole->start;
618
619         hole = rb_entry(node, struct file_extent_hole, node);
620         if (start < hole->start)
621                 return -1;
622         if (start >= hole->start && start < hole->start + hole->len)
623                 return 0;
624         return 1;
625 }
626
627 /*
628  * Delete a hole in the record
629  *
630  * This will do the hole split and is much restrict than add.
631  */
632 static int del_file_extent_hole(struct rb_root *holes,
633                                 u64 start, u64 len)
634 {
635         struct file_extent_hole *hole;
636         struct file_extent_hole tmp;
637         u64 prev_start = 0;
638         u64 prev_len = 0;
639         u64 next_start = 0;
640         u64 next_len = 0;
641         struct rb_node *node;
642         int have_prev = 0;
643         int have_next = 0;
644         int ret = 0;
645
646         tmp.start = start;
647         tmp.len = len;
648         node = rb_search(holes, &tmp, compare_hole_range, NULL);
649         if (!node)
650                 return -EEXIST;
651         hole = rb_entry(node, struct file_extent_hole, node);
652         if (start + len > hole->start + hole->len)
653                 return -EEXIST;
654
655         /*
656          * Now there will be no overlap, delete the hole and re-add the
657          * split(s) if they exists.
658          */
659         if (start > hole->start) {
660                 prev_start = hole->start;
661                 prev_len = start - hole->start;
662                 have_prev = 1;
663         }
664         if (hole->start + hole->len > start + len) {
665                 next_start = start + len;
666                 next_len = hole->start + hole->len - start - len;
667                 have_next = 1;
668         }
669         rb_erase(node, holes);
670         free(hole);
671         if (have_prev) {
672                 ret = add_file_extent_hole(holes, prev_start, prev_len);
673                 if (ret < 0)
674                         return ret;
675         }
676         if (have_next) {
677                 ret = add_file_extent_hole(holes, next_start, next_len);
678                 if (ret < 0)
679                         return ret;
680         }
681         return 0;
682 }
683
684 static int copy_file_extent_holes(struct rb_root *dst,
685                                   struct rb_root *src)
686 {
687         struct file_extent_hole *hole;
688         struct rb_node *node;
689         int ret = 0;
690
691         node = rb_first(src);
692         while (node) {
693                 hole = rb_entry(node, struct file_extent_hole, node);
694                 ret = add_file_extent_hole(dst, hole->start, hole->len);
695                 if (ret)
696                         break;
697                 node = rb_next(node);
698         }
699         return ret;
700 }
701
702 static void free_file_extent_holes(struct rb_root *holes)
703 {
704         struct rb_node *node;
705         struct file_extent_hole *hole;
706
707         node = rb_first(holes);
708         while (node) {
709                 hole = rb_entry(node, struct file_extent_hole, node);
710                 rb_erase(node, holes);
711                 free(hole);
712                 node = rb_first(holes);
713         }
714 }
715
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
717
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719                                  struct btrfs_root *root)
720 {
721         if (root->last_trans != trans->transid) {
722                 root->track_dirty = 1;
723                 root->last_trans = trans->transid;
724                 root->commit_root = root->node;
725                 extent_buffer_get(root->node);
726         }
727 }
728
729 static u8 imode_to_type(u32 imode)
730 {
731 #define S_SHIFT 12
732         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
734                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
735                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
736                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
737                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
738                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
739                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
740         };
741
742         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
743 #undef S_SHIFT
744 }
745
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
747 {
748         struct device_record *rec1;
749         struct device_record *rec2;
750
751         rec1 = rb_entry(node1, struct device_record, node);
752         rec2 = rb_entry(node2, struct device_record, node);
753         if (rec1->devid > rec2->devid)
754                 return -1;
755         else if (rec1->devid < rec2->devid)
756                 return 1;
757         else
758                 return 0;
759 }
760
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
762 {
763         struct inode_record *rec;
764         struct inode_backref *backref;
765         struct inode_backref *orig;
766         struct inode_backref *tmp;
767         struct orphan_data_extent *src_orphan;
768         struct orphan_data_extent *dst_orphan;
769         struct rb_node *rb;
770         size_t size;
771         int ret;
772
773         rec = malloc(sizeof(*rec));
774         if (!rec)
775                 return ERR_PTR(-ENOMEM);
776         memcpy(rec, orig_rec, sizeof(*rec));
777         rec->refs = 1;
778         INIT_LIST_HEAD(&rec->backrefs);
779         INIT_LIST_HEAD(&rec->orphan_extents);
780         rec->holes = RB_ROOT;
781
782         list_for_each_entry(orig, &orig_rec->backrefs, list) {
783                 size = sizeof(*orig) + orig->namelen + 1;
784                 backref = malloc(size);
785                 if (!backref) {
786                         ret = -ENOMEM;
787                         goto cleanup;
788                 }
789                 memcpy(backref, orig, size);
790                 list_add_tail(&backref->list, &rec->backrefs);
791         }
792         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793                 dst_orphan = malloc(sizeof(*dst_orphan));
794                 if (!dst_orphan) {
795                         ret = -ENOMEM;
796                         goto cleanup;
797                 }
798                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
800         }
801         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
802         if (ret < 0)
803                 goto cleanup_rb;
804
805         return rec;
806
807 cleanup_rb:
808         rb = rb_first(&rec->holes);
809         while (rb) {
810                 struct file_extent_hole *hole;
811
812                 hole = rb_entry(rb, struct file_extent_hole, node);
813                 rb = rb_next(rb);
814                 free(hole);
815         }
816
817 cleanup:
818         if (!list_empty(&rec->backrefs))
819                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820                         list_del(&orig->list);
821                         free(orig);
822                 }
823
824         if (!list_empty(&rec->orphan_extents))
825                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826                         list_del(&orig->list);
827                         free(orig);
828                 }
829
830         free(rec);
831
832         return ERR_PTR(ret);
833 }
834
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
836                                       u64 objectid)
837 {
838         struct orphan_data_extent *orphan;
839
840         if (list_empty(orphan_extents))
841                 return;
842         printf("The following data extent is lost in tree %llu:\n",
843                objectid);
844         list_for_each_entry(orphan, orphan_extents, list) {
845                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
847                        orphan->disk_len);
848         }
849 }
850
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
852 {
853         u64 root_objectid = root->root_key.objectid;
854         int errors = rec->errors;
855
856         if (!errors)
857                 return;
858         /* reloc root errors, we print its corresponding fs root objectid*/
859         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860                 root_objectid = root->root_key.offset;
861                 fprintf(stderr, "reloc");
862         }
863         fprintf(stderr, "root %llu inode %llu errors %x",
864                 (unsigned long long) root_objectid,
865                 (unsigned long long) rec->ino, rec->errors);
866
867         if (errors & I_ERR_NO_INODE_ITEM)
868                 fprintf(stderr, ", no inode item");
869         if (errors & I_ERR_NO_ORPHAN_ITEM)
870                 fprintf(stderr, ", no orphan item");
871         if (errors & I_ERR_DUP_INODE_ITEM)
872                 fprintf(stderr, ", dup inode item");
873         if (errors & I_ERR_DUP_DIR_INDEX)
874                 fprintf(stderr, ", dup dir index");
875         if (errors & I_ERR_ODD_DIR_ITEM)
876                 fprintf(stderr, ", odd dir item");
877         if (errors & I_ERR_ODD_FILE_EXTENT)
878                 fprintf(stderr, ", odd file extent");
879         if (errors & I_ERR_BAD_FILE_EXTENT)
880                 fprintf(stderr, ", bad file extent");
881         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882                 fprintf(stderr, ", file extent overlap");
883         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884                 fprintf(stderr, ", file extent discount");
885         if (errors & I_ERR_DIR_ISIZE_WRONG)
886                 fprintf(stderr, ", dir isize wrong");
887         if (errors & I_ERR_FILE_NBYTES_WRONG)
888                 fprintf(stderr, ", nbytes wrong");
889         if (errors & I_ERR_ODD_CSUM_ITEM)
890                 fprintf(stderr, ", odd csum item");
891         if (errors & I_ERR_SOME_CSUM_MISSING)
892                 fprintf(stderr, ", some csum missing");
893         if (errors & I_ERR_LINK_COUNT_WRONG)
894                 fprintf(stderr, ", link count wrong");
895         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896                 fprintf(stderr, ", orphan file extent");
897         fprintf(stderr, "\n");
898         /* Print the orphan extents if needed */
899         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
901
902         /* Print the holes if needed */
903         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904                 struct file_extent_hole *hole;
905                 struct rb_node *node;
906                 int found = 0;
907
908                 node = rb_first(&rec->holes);
909                 fprintf(stderr, "Found file extent holes:\n");
910                 while (node) {
911                         found = 1;
912                         hole = rb_entry(node, struct file_extent_hole, node);
913                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
914                                 hole->start, hole->len);
915                         node = rb_next(node);
916                 }
917                 if (!found)
918                         fprintf(stderr, "\tstart: 0, len: %llu\n",
919                                 round_up(rec->isize,
920                                          root->fs_info->sectorsize));
921         }
922 }
923
924 static void print_ref_error(int errors)
925 {
926         if (errors & REF_ERR_NO_DIR_ITEM)
927                 fprintf(stderr, ", no dir item");
928         if (errors & REF_ERR_NO_DIR_INDEX)
929                 fprintf(stderr, ", no dir index");
930         if (errors & REF_ERR_NO_INODE_REF)
931                 fprintf(stderr, ", no inode ref");
932         if (errors & REF_ERR_DUP_DIR_ITEM)
933                 fprintf(stderr, ", dup dir item");
934         if (errors & REF_ERR_DUP_DIR_INDEX)
935                 fprintf(stderr, ", dup dir index");
936         if (errors & REF_ERR_DUP_INODE_REF)
937                 fprintf(stderr, ", dup inode ref");
938         if (errors & REF_ERR_INDEX_UNMATCH)
939                 fprintf(stderr, ", index mismatch");
940         if (errors & REF_ERR_FILETYPE_UNMATCH)
941                 fprintf(stderr, ", filetype mismatch");
942         if (errors & REF_ERR_NAME_TOO_LONG)
943                 fprintf(stderr, ", name too long");
944         if (errors & REF_ERR_NO_ROOT_REF)
945                 fprintf(stderr, ", no root ref");
946         if (errors & REF_ERR_NO_ROOT_BACKREF)
947                 fprintf(stderr, ", no root backref");
948         if (errors & REF_ERR_DUP_ROOT_REF)
949                 fprintf(stderr, ", dup root ref");
950         if (errors & REF_ERR_DUP_ROOT_BACKREF)
951                 fprintf(stderr, ", dup root backref");
952         fprintf(stderr, "\n");
953 }
954
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956                                           u64 ino, int mod)
957 {
958         struct ptr_node *node;
959         struct cache_extent *cache;
960         struct inode_record *rec = NULL;
961         int ret;
962
963         cache = lookup_cache_extent(inode_cache, ino, 1);
964         if (cache) {
965                 node = container_of(cache, struct ptr_node, cache);
966                 rec = node->data;
967                 if (mod && rec->refs > 1) {
968                         node->data = clone_inode_rec(rec);
969                         if (IS_ERR(node->data))
970                                 return node->data;
971                         rec->refs--;
972                         rec = node->data;
973                 }
974         } else if (mod) {
975                 rec = calloc(1, sizeof(*rec));
976                 if (!rec)
977                         return ERR_PTR(-ENOMEM);
978                 rec->ino = ino;
979                 rec->extent_start = (u64)-1;
980                 rec->refs = 1;
981                 INIT_LIST_HEAD(&rec->backrefs);
982                 INIT_LIST_HEAD(&rec->orphan_extents);
983                 rec->holes = RB_ROOT;
984
985                 node = malloc(sizeof(*node));
986                 if (!node) {
987                         free(rec);
988                         return ERR_PTR(-ENOMEM);
989                 }
990                 node->cache.start = ino;
991                 node->cache.size = 1;
992                 node->data = rec;
993
994                 if (ino == BTRFS_FREE_INO_OBJECTID)
995                         rec->found_link = 1;
996
997                 ret = insert_cache_extent(inode_cache, &node->cache);
998                 if (ret)
999                         return ERR_PTR(-EEXIST);
1000         }
1001         return rec;
1002 }
1003
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1005 {
1006         struct orphan_data_extent *orphan;
1007
1008         while (!list_empty(orphan_extents)) {
1009                 orphan = list_entry(orphan_extents->next,
1010                                     struct orphan_data_extent, list);
1011                 list_del(&orphan->list);
1012                 free(orphan);
1013         }
1014 }
1015
1016 static void free_inode_rec(struct inode_record *rec)
1017 {
1018         struct inode_backref *backref;
1019
1020         if (--rec->refs > 0)
1021                 return;
1022
1023         while (!list_empty(&rec->backrefs)) {
1024                 backref = to_inode_backref(rec->backrefs.next);
1025                 list_del(&backref->list);
1026                 free(backref);
1027         }
1028         free_orphan_data_extents(&rec->orphan_extents);
1029         free_file_extent_holes(&rec->holes);
1030         free(rec);
1031 }
1032
1033 static int can_free_inode_rec(struct inode_record *rec)
1034 {
1035         if (!rec->errors && rec->checked && rec->found_inode_item &&
1036             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1037                 return 1;
1038         return 0;
1039 }
1040
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042                                  struct inode_record *rec)
1043 {
1044         struct cache_extent *cache;
1045         struct inode_backref *tmp, *backref;
1046         struct ptr_node *node;
1047         u8 filetype;
1048
1049         if (!rec->found_inode_item)
1050                 return;
1051
1052         filetype = imode_to_type(rec->imode);
1053         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054                 if (backref->found_dir_item && backref->found_dir_index) {
1055                         if (backref->filetype != filetype)
1056                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057                         if (!backref->errors && backref->found_inode_ref &&
1058                             rec->nlink == rec->found_link) {
1059                                 list_del(&backref->list);
1060                                 free(backref);
1061                         }
1062                 }
1063         }
1064
1065         if (!rec->checked || rec->merging)
1066                 return;
1067
1068         if (S_ISDIR(rec->imode)) {
1069                 if (rec->found_size != rec->isize)
1070                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071                 if (rec->found_file_extent)
1072                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074                 if (rec->found_dir_item)
1075                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1076                 if (rec->found_size != rec->nbytes)
1077                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078                 if (rec->nlink > 0 && !no_holes &&
1079                     (rec->extent_end < rec->isize ||
1080                      first_extent_gap(&rec->holes) < rec->isize))
1081                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082         }
1083
1084         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085                 if (rec->found_csum_item && rec->nodatasum)
1086                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087                 if (rec->some_csum_missing && !rec->nodatasum)
1088                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089         }
1090
1091         BUG_ON(rec->refs != 1);
1092         if (can_free_inode_rec(rec)) {
1093                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094                 node = container_of(cache, struct ptr_node, cache);
1095                 BUG_ON(node->data != rec);
1096                 remove_cache_extent(inode_cache, &node->cache);
1097                 free(node);
1098                 free_inode_rec(rec);
1099         }
1100 }
1101
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1103 {
1104         struct btrfs_path path;
1105         struct btrfs_key key;
1106         int ret;
1107
1108         key.objectid = BTRFS_ORPHAN_OBJECTID;
1109         key.type = BTRFS_ORPHAN_ITEM_KEY;
1110         key.offset = ino;
1111
1112         btrfs_init_path(&path);
1113         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114         btrfs_release_path(&path);
1115         if (ret > 0)
1116                 ret = -ENOENT;
1117         return ret;
1118 }
1119
1120 static int process_inode_item(struct extent_buffer *eb,
1121                               int slot, struct btrfs_key *key,
1122                               struct shared_node *active_node)
1123 {
1124         struct inode_record *rec;
1125         struct btrfs_inode_item *item;
1126
1127         rec = active_node->current;
1128         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129         if (rec->found_inode_item) {
1130                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131                 return 1;
1132         }
1133         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134         rec->nlink = btrfs_inode_nlink(eb, item);
1135         rec->isize = btrfs_inode_size(eb, item);
1136         rec->nbytes = btrfs_inode_nbytes(eb, item);
1137         rec->imode = btrfs_inode_mode(eb, item);
1138         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1139                 rec->nodatasum = 1;
1140         rec->found_inode_item = 1;
1141         if (rec->nlink == 0)
1142                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143         maybe_free_inode_rec(&active_node->inode_cache, rec);
1144         return 0;
1145 }
1146
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1148                                                 const char *name,
1149                                                 int namelen, u64 dir)
1150 {
1151         struct inode_backref *backref;
1152
1153         list_for_each_entry(backref, &rec->backrefs, list) {
1154                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1155                         break;
1156                 if (backref->dir != dir || backref->namelen != namelen)
1157                         continue;
1158                 if (memcmp(name, backref->name, namelen))
1159                         continue;
1160                 return backref;
1161         }
1162
1163         backref = malloc(sizeof(*backref) + namelen + 1);
1164         if (!backref)
1165                 return NULL;
1166         memset(backref, 0, sizeof(*backref));
1167         backref->dir = dir;
1168         backref->namelen = namelen;
1169         memcpy(backref->name, name, namelen);
1170         backref->name[namelen] = '\0';
1171         list_add_tail(&backref->list, &rec->backrefs);
1172         return backref;
1173 }
1174
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176                              u64 ino, u64 dir, u64 index,
1177                              const char *name, int namelen,
1178                              u8 filetype, u8 itemtype, int errors)
1179 {
1180         struct inode_record *rec;
1181         struct inode_backref *backref;
1182
1183         rec = get_inode_rec(inode_cache, ino, 1);
1184         BUG_ON(IS_ERR(rec));
1185         backref = get_inode_backref(rec, name, namelen, dir);
1186         BUG_ON(!backref);
1187         if (errors)
1188                 backref->errors |= errors;
1189         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190                 if (backref->found_dir_index)
1191                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192                 if (backref->found_inode_ref && backref->index != index)
1193                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1194                 if (backref->found_dir_item && backref->filetype != filetype)
1195                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1196
1197                 backref->index = index;
1198                 backref->filetype = filetype;
1199                 backref->found_dir_index = 1;
1200         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1201                 rec->found_link++;
1202                 if (backref->found_dir_item)
1203                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204                 if (backref->found_dir_index && backref->filetype != filetype)
1205                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1206
1207                 backref->filetype = filetype;
1208                 backref->found_dir_item = 1;
1209         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211                 if (backref->found_inode_ref)
1212                         backref->errors |= REF_ERR_DUP_INODE_REF;
1213                 if (backref->found_dir_index && backref->index != index)
1214                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1215                 else
1216                         backref->index = index;
1217
1218                 backref->ref_type = itemtype;
1219                 backref->found_inode_ref = 1;
1220         } else {
1221                 BUG_ON(1);
1222         }
1223
1224         maybe_free_inode_rec(inode_cache, rec);
1225         return 0;
1226 }
1227
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229                             struct cache_tree *dst_cache)
1230 {
1231         struct inode_backref *backref;
1232         u32 dir_count = 0;
1233         int ret = 0;
1234
1235         dst->merging = 1;
1236         list_for_each_entry(backref, &src->backrefs, list) {
1237                 if (backref->found_dir_index) {
1238                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1239                                         backref->index, backref->name,
1240                                         backref->namelen, backref->filetype,
1241                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1242                 }
1243                 if (backref->found_dir_item) {
1244                         dir_count++;
1245                         add_inode_backref(dst_cache, dst->ino,
1246                                         backref->dir, 0, backref->name,
1247                                         backref->namelen, backref->filetype,
1248                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1249                 }
1250                 if (backref->found_inode_ref) {
1251                         add_inode_backref(dst_cache, dst->ino,
1252                                         backref->dir, backref->index,
1253                                         backref->name, backref->namelen, 0,
1254                                         backref->ref_type, backref->errors);
1255                 }
1256         }
1257
1258         if (src->found_dir_item)
1259                 dst->found_dir_item = 1;
1260         if (src->found_file_extent)
1261                 dst->found_file_extent = 1;
1262         if (src->found_csum_item)
1263                 dst->found_csum_item = 1;
1264         if (src->some_csum_missing)
1265                 dst->some_csum_missing = 1;
1266         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1268                 if (ret < 0)
1269                         return ret;
1270         }
1271
1272         BUG_ON(src->found_link < dir_count);
1273         dst->found_link += src->found_link - dir_count;
1274         dst->found_size += src->found_size;
1275         if (src->extent_start != (u64)-1) {
1276                 if (dst->extent_start == (u64)-1) {
1277                         dst->extent_start = src->extent_start;
1278                         dst->extent_end = src->extent_end;
1279                 } else {
1280                         if (dst->extent_end > src->extent_start)
1281                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282                         else if (dst->extent_end < src->extent_start) {
1283                                 ret = add_file_extent_hole(&dst->holes,
1284                                         dst->extent_end,
1285                                         src->extent_start - dst->extent_end);
1286                         }
1287                         if (dst->extent_end < src->extent_end)
1288                                 dst->extent_end = src->extent_end;
1289                 }
1290         }
1291
1292         dst->errors |= src->errors;
1293         if (src->found_inode_item) {
1294                 if (!dst->found_inode_item) {
1295                         dst->nlink = src->nlink;
1296                         dst->isize = src->isize;
1297                         dst->nbytes = src->nbytes;
1298                         dst->imode = src->imode;
1299                         dst->nodatasum = src->nodatasum;
1300                         dst->found_inode_item = 1;
1301                 } else {
1302                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1303                 }
1304         }
1305         dst->merging = 0;
1306
1307         return 0;
1308 }
1309
1310 static int splice_shared_node(struct shared_node *src_node,
1311                               struct shared_node *dst_node)
1312 {
1313         struct cache_extent *cache;
1314         struct ptr_node *node, *ins;
1315         struct cache_tree *src, *dst;
1316         struct inode_record *rec, *conflict;
1317         u64 current_ino = 0;
1318         int splice = 0;
1319         int ret;
1320
1321         if (--src_node->refs == 0)
1322                 splice = 1;
1323         if (src_node->current)
1324                 current_ino = src_node->current->ino;
1325
1326         src = &src_node->root_cache;
1327         dst = &dst_node->root_cache;
1328 again:
1329         cache = search_cache_extent(src, 0);
1330         while (cache) {
1331                 node = container_of(cache, struct ptr_node, cache);
1332                 rec = node->data;
1333                 cache = next_cache_extent(cache);
1334
1335                 if (splice) {
1336                         remove_cache_extent(src, &node->cache);
1337                         ins = node;
1338                 } else {
1339                         ins = malloc(sizeof(*ins));
1340                         BUG_ON(!ins);
1341                         ins->cache.start = node->cache.start;
1342                         ins->cache.size = node->cache.size;
1343                         ins->data = rec;
1344                         rec->refs++;
1345                 }
1346                 ret = insert_cache_extent(dst, &ins->cache);
1347                 if (ret == -EEXIST) {
1348                         conflict = get_inode_rec(dst, rec->ino, 1);
1349                         BUG_ON(IS_ERR(conflict));
1350                         merge_inode_recs(rec, conflict, dst);
1351                         if (rec->checked) {
1352                                 conflict->checked = 1;
1353                                 if (dst_node->current == conflict)
1354                                         dst_node->current = NULL;
1355                         }
1356                         maybe_free_inode_rec(dst, conflict);
1357                         free_inode_rec(rec);
1358                         free(ins);
1359                 } else {
1360                         BUG_ON(ret);
1361                 }
1362         }
1363
1364         if (src == &src_node->root_cache) {
1365                 src = &src_node->inode_cache;
1366                 dst = &dst_node->inode_cache;
1367                 goto again;
1368         }
1369
1370         if (current_ino > 0 && (!dst_node->current ||
1371             current_ino > dst_node->current->ino)) {
1372                 if (dst_node->current) {
1373                         dst_node->current->checked = 1;
1374                         maybe_free_inode_rec(dst, dst_node->current);
1375                 }
1376                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377                 BUG_ON(IS_ERR(dst_node->current));
1378         }
1379         return 0;
1380 }
1381
1382 static void free_inode_ptr(struct cache_extent *cache)
1383 {
1384         struct ptr_node *node;
1385         struct inode_record *rec;
1386
1387         node = container_of(cache, struct ptr_node, cache);
1388         rec = node->data;
1389         free_inode_rec(rec);
1390         free(node);
1391 }
1392
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1394
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396                                             u64 bytenr)
1397 {
1398         struct cache_extent *cache;
1399         struct shared_node *node;
1400
1401         cache = lookup_cache_extent(shared, bytenr, 1);
1402         if (cache) {
1403                 node = container_of(cache, struct shared_node, cache);
1404                 return node;
1405         }
1406         return NULL;
1407 }
1408
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 {
1411         int ret;
1412         struct shared_node *node;
1413
1414         node = calloc(1, sizeof(*node));
1415         if (!node)
1416                 return -ENOMEM;
1417         node->cache.start = bytenr;
1418         node->cache.size = 1;
1419         cache_tree_init(&node->root_cache);
1420         cache_tree_init(&node->inode_cache);
1421         node->refs = refs;
1422
1423         ret = insert_cache_extent(shared, &node->cache);
1424
1425         return ret;
1426 }
1427
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429                              struct walk_control *wc, int level)
1430 {
1431         struct shared_node *node;
1432         struct shared_node *dest;
1433         int ret;
1434
1435         if (level == wc->active_node)
1436                 return 0;
1437
1438         BUG_ON(wc->active_node <= level);
1439         node = find_shared_node(&wc->shared, bytenr);
1440         if (!node) {
1441                 ret = add_shared_node(&wc->shared, bytenr, refs);
1442                 BUG_ON(ret);
1443                 node = find_shared_node(&wc->shared, bytenr);
1444                 wc->nodes[level] = node;
1445                 wc->active_node = level;
1446                 return 0;
1447         }
1448
1449         if (wc->root_level == wc->active_node &&
1450             btrfs_root_refs(&root->root_item) == 0) {
1451                 if (--node->refs == 0) {
1452                         free_inode_recs_tree(&node->root_cache);
1453                         free_inode_recs_tree(&node->inode_cache);
1454                         remove_cache_extent(&wc->shared, &node->cache);
1455                         free(node);
1456                 }
1457                 return 1;
1458         }
1459
1460         dest = wc->nodes[wc->active_node];
1461         splice_shared_node(node, dest);
1462         if (node->refs == 0) {
1463                 remove_cache_extent(&wc->shared, &node->cache);
1464                 free(node);
1465         }
1466         return 1;
1467 }
1468
1469 static int leave_shared_node(struct btrfs_root *root,
1470                              struct walk_control *wc, int level)
1471 {
1472         struct shared_node *node;
1473         struct shared_node *dest;
1474         int i;
1475
1476         if (level == wc->root_level)
1477                 return 0;
1478
1479         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1480                 if (wc->nodes[i])
1481                         break;
1482         }
1483         BUG_ON(i >= BTRFS_MAX_LEVEL);
1484
1485         node = wc->nodes[wc->active_node];
1486         wc->nodes[wc->active_node] = NULL;
1487         wc->active_node = i;
1488
1489         dest = wc->nodes[wc->active_node];
1490         if (wc->active_node < wc->root_level ||
1491             btrfs_root_refs(&root->root_item) > 0) {
1492                 BUG_ON(node->refs <= 1);
1493                 splice_shared_node(node, dest);
1494         } else {
1495                 BUG_ON(node->refs < 2);
1496                 node->refs--;
1497         }
1498         return 0;
1499 }
1500
1501 /*
1502  * Returns:
1503  * < 0 - on error
1504  * 1   - if the root with id child_root_id is a child of root parent_root_id
1505  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1506  *       has other root(s) as parent(s)
1507  * 2   - if the root child_root_id doesn't have any parent roots
1508  */
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510                          u64 child_root_id)
1511 {
1512         struct btrfs_path path;
1513         struct btrfs_key key;
1514         struct extent_buffer *leaf;
1515         int has_parent = 0;
1516         int ret;
1517
1518         btrfs_init_path(&path);
1519
1520         key.objectid = parent_root_id;
1521         key.type = BTRFS_ROOT_REF_KEY;
1522         key.offset = child_root_id;
1523         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1524                                 0, 0);
1525         if (ret < 0)
1526                 return ret;
1527         btrfs_release_path(&path);
1528         if (!ret)
1529                 return 1;
1530
1531         key.objectid = child_root_id;
1532         key.type = BTRFS_ROOT_BACKREF_KEY;
1533         key.offset = 0;
1534         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1535                                 0, 0);
1536         if (ret < 0)
1537                 goto out;
1538
1539         while (1) {
1540                 leaf = path.nodes[0];
1541                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543                         if (ret)
1544                                 break;
1545                         leaf = path.nodes[0];
1546                 }
1547
1548                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549                 if (key.objectid != child_root_id ||
1550                     key.type != BTRFS_ROOT_BACKREF_KEY)
1551                         break;
1552
1553                 has_parent = 1;
1554
1555                 if (key.offset == parent_root_id) {
1556                         btrfs_release_path(&path);
1557                         return 1;
1558                 }
1559
1560                 path.slots[0]++;
1561         }
1562 out:
1563         btrfs_release_path(&path);
1564         if (ret < 0)
1565                 return ret;
1566         return has_parent ? 0 : 2;
1567 }
1568
1569 static int process_dir_item(struct extent_buffer *eb,
1570                             int slot, struct btrfs_key *key,
1571                             struct shared_node *active_node)
1572 {
1573         u32 total;
1574         u32 cur = 0;
1575         u32 len;
1576         u32 name_len;
1577         u32 data_len;
1578         int error;
1579         int nritems = 0;
1580         u8 filetype;
1581         struct btrfs_dir_item *di;
1582         struct inode_record *rec;
1583         struct cache_tree *root_cache;
1584         struct cache_tree *inode_cache;
1585         struct btrfs_key location;
1586         char namebuf[BTRFS_NAME_LEN];
1587
1588         root_cache = &active_node->root_cache;
1589         inode_cache = &active_node->inode_cache;
1590         rec = active_node->current;
1591         rec->found_dir_item = 1;
1592
1593         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594         total = btrfs_item_size_nr(eb, slot);
1595         while (cur < total) {
1596                 nritems++;
1597                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598                 name_len = btrfs_dir_name_len(eb, di);
1599                 data_len = btrfs_dir_data_len(eb, di);
1600                 filetype = btrfs_dir_type(eb, di);
1601
1602                 rec->found_size += name_len;
1603                 if (cur + sizeof(*di) + name_len > total ||
1604                     name_len > BTRFS_NAME_LEN) {
1605                         error = REF_ERR_NAME_TOO_LONG;
1606
1607                         if (cur + sizeof(*di) > total)
1608                                 break;
1609                         len = min_t(u32, total - cur - sizeof(*di),
1610                                     BTRFS_NAME_LEN);
1611                 } else {
1612                         len = name_len;
1613                         error = 0;
1614                 }
1615
1616                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1617
1618                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619                     key->offset != btrfs_name_hash(namebuf, len)) {
1620                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1621                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622                         key->objectid, key->offset, namebuf, len, filetype,
1623                         key->offset, btrfs_name_hash(namebuf, len));
1624                 }
1625
1626                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627                         add_inode_backref(inode_cache, location.objectid,
1628                                           key->objectid, key->offset, namebuf,
1629                                           len, filetype, key->type, error);
1630                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631                         add_inode_backref(root_cache, location.objectid,
1632                                           key->objectid, key->offset,
1633                                           namebuf, len, filetype,
1634                                           key->type, error);
1635                 } else {
1636                         fprintf(stderr, "invalid location in dir item %u\n",
1637                                 location.type);
1638                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639                                           key->objectid, key->offset, namebuf,
1640                                           len, filetype, key->type, error);
1641                 }
1642
1643                 len = sizeof(*di) + name_len + data_len;
1644                 di = (struct btrfs_dir_item *)((char *)di + len);
1645                 cur += len;
1646         }
1647         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1649
1650         return 0;
1651 }
1652
1653 static int process_inode_ref(struct extent_buffer *eb,
1654                              int slot, struct btrfs_key *key,
1655                              struct shared_node *active_node)
1656 {
1657         u32 total;
1658         u32 cur = 0;
1659         u32 len;
1660         u32 name_len;
1661         u64 index;
1662         int error;
1663         struct cache_tree *inode_cache;
1664         struct btrfs_inode_ref *ref;
1665         char namebuf[BTRFS_NAME_LEN];
1666
1667         inode_cache = &active_node->inode_cache;
1668
1669         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670         total = btrfs_item_size_nr(eb, slot);
1671         while (cur < total) {
1672                 name_len = btrfs_inode_ref_name_len(eb, ref);
1673                 index = btrfs_inode_ref_index(eb, ref);
1674
1675                 /* inode_ref + namelen should not cross item boundary */
1676                 if (cur + sizeof(*ref) + name_len > total ||
1677                     name_len > BTRFS_NAME_LEN) {
1678                         if (total < cur + sizeof(*ref))
1679                                 break;
1680
1681                         /* Still try to read out the remaining part */
1682                         len = min_t(u32, total - cur - sizeof(*ref),
1683                                     BTRFS_NAME_LEN);
1684                         error = REF_ERR_NAME_TOO_LONG;
1685                 } else {
1686                         len = name_len;
1687                         error = 0;
1688                 }
1689
1690                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691                 add_inode_backref(inode_cache, key->objectid, key->offset,
1692                                   index, namebuf, len, 0, key->type, error);
1693
1694                 len = sizeof(*ref) + name_len;
1695                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1696                 cur += len;
1697         }
1698         return 0;
1699 }
1700
1701 static int process_inode_extref(struct extent_buffer *eb,
1702                                 int slot, struct btrfs_key *key,
1703                                 struct shared_node *active_node)
1704 {
1705         u32 total;
1706         u32 cur = 0;
1707         u32 len;
1708         u32 name_len;
1709         u64 index;
1710         u64 parent;
1711         int error;
1712         struct cache_tree *inode_cache;
1713         struct btrfs_inode_extref *extref;
1714         char namebuf[BTRFS_NAME_LEN];
1715
1716         inode_cache = &active_node->inode_cache;
1717
1718         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719         total = btrfs_item_size_nr(eb, slot);
1720         while (cur < total) {
1721                 name_len = btrfs_inode_extref_name_len(eb, extref);
1722                 index = btrfs_inode_extref_index(eb, extref);
1723                 parent = btrfs_inode_extref_parent(eb, extref);
1724                 if (name_len <= BTRFS_NAME_LEN) {
1725                         len = name_len;
1726                         error = 0;
1727                 } else {
1728                         len = BTRFS_NAME_LEN;
1729                         error = REF_ERR_NAME_TOO_LONG;
1730                 }
1731                 read_extent_buffer(eb, namebuf,
1732                                    (unsigned long)(extref + 1), len);
1733                 add_inode_backref(inode_cache, key->objectid, parent,
1734                                   index, namebuf, len, 0, key->type, error);
1735
1736                 len = sizeof(*extref) + name_len;
1737                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1738                 cur += len;
1739         }
1740         return 0;
1741
1742 }
1743
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745                             u64 len, u64 *found)
1746 {
1747         struct btrfs_key key;
1748         struct btrfs_path path;
1749         struct extent_buffer *leaf;
1750         int ret;
1751         size_t size;
1752         *found = 0;
1753         u64 csum_end;
1754         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1755
1756         btrfs_init_path(&path);
1757
1758         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1759         key.offset = start;
1760         key.type = BTRFS_EXTENT_CSUM_KEY;
1761
1762         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1763                                 &key, &path, 0, 0);
1764         if (ret < 0)
1765                 goto out;
1766         if (ret > 0 && path.slots[0] > 0) {
1767                 leaf = path.nodes[0];
1768                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770                     key.type == BTRFS_EXTENT_CSUM_KEY)
1771                         path.slots[0]--;
1772         }
1773
1774         while (len > 0) {
1775                 leaf = path.nodes[0];
1776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1778                         if (ret > 0)
1779                                 break;
1780                         else if (ret < 0)
1781                                 goto out;
1782                         leaf = path.nodes[0];
1783                 }
1784
1785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787                     key.type != BTRFS_EXTENT_CSUM_KEY)
1788                         break;
1789
1790                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791                 if (key.offset >= start + len)
1792                         break;
1793
1794                 if (key.offset > start)
1795                         start = key.offset;
1796
1797                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798                 csum_end = key.offset + (size / csum_size) *
1799                            root->fs_info->sectorsize;
1800                 if (csum_end > start) {
1801                         size = min(csum_end - start, len);
1802                         len -= size;
1803                         start += size;
1804                         *found += size;
1805                 }
1806
1807                 path.slots[0]++;
1808         }
1809 out:
1810         btrfs_release_path(&path);
1811         if (ret < 0)
1812                 return ret;
1813         return 0;
1814 }
1815
1816 static int process_file_extent(struct btrfs_root *root,
1817                                 struct extent_buffer *eb,
1818                                 int slot, struct btrfs_key *key,
1819                                 struct shared_node *active_node)
1820 {
1821         struct inode_record *rec;
1822         struct btrfs_file_extent_item *fi;
1823         u64 num_bytes = 0;
1824         u64 disk_bytenr = 0;
1825         u64 extent_offset = 0;
1826         u64 mask = root->fs_info->sectorsize - 1;
1827         int extent_type;
1828         int ret;
1829
1830         rec = active_node->current;
1831         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832         rec->found_file_extent = 1;
1833
1834         if (rec->extent_start == (u64)-1) {
1835                 rec->extent_start = key->offset;
1836                 rec->extent_end = key->offset;
1837         }
1838
1839         if (rec->extent_end > key->offset)
1840                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841         else if (rec->extent_end < key->offset) {
1842                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843                                            key->offset - rec->extent_end);
1844                 if (ret < 0)
1845                         return ret;
1846         }
1847
1848         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849         extent_type = btrfs_file_extent_type(eb, fi);
1850
1851         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1853                 if (num_bytes == 0)
1854                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855                 rec->found_size += num_bytes;
1856                 num_bytes = (num_bytes + mask) & ~mask;
1857         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861                 extent_offset = btrfs_file_extent_offset(eb, fi);
1862                 if (num_bytes == 0 || (num_bytes & mask))
1863                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864                 if (num_bytes + extent_offset >
1865                     btrfs_file_extent_ram_bytes(eb, fi))
1866                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868                     (btrfs_file_extent_compression(eb, fi) ||
1869                      btrfs_file_extent_encryption(eb, fi) ||
1870                      btrfs_file_extent_other_encoding(eb, fi)))
1871                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872                 if (disk_bytenr > 0)
1873                         rec->found_size += num_bytes;
1874         } else {
1875                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1876         }
1877         rec->extent_end = key->offset + num_bytes;
1878
1879         /*
1880          * The data reloc tree will copy full extents into its inode and then
1881          * copy the corresponding csums.  Because the extent it copied could be
1882          * a preallocated extent that hasn't been written to yet there may be no
1883          * csums to copy, ergo we won't have csums for our file extent.  This is
1884          * ok so just don't bother checking csums if the inode belongs to the
1885          * data reloc tree.
1886          */
1887         if (disk_bytenr > 0 &&
1888             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1889                 u64 found;
1890                 if (btrfs_file_extent_compression(eb, fi))
1891                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1892                 else
1893                         disk_bytenr += extent_offset;
1894
1895                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896                 if (ret < 0)
1897                         return ret;
1898                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1899                         if (found > 0)
1900                                 rec->found_csum_item = 1;
1901                         if (found < num_bytes)
1902                                 rec->some_csum_missing = 1;
1903                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1904                         if (found > 0)
1905                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1906                 }
1907         }
1908         return 0;
1909 }
1910
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912                             struct walk_control *wc)
1913 {
1914         struct btrfs_key key;
1915         u32 nritems;
1916         int i;
1917         int ret = 0;
1918         struct cache_tree *inode_cache;
1919         struct shared_node *active_node;
1920
1921         if (wc->root_level == wc->active_node &&
1922             btrfs_root_refs(&root->root_item) == 0)
1923                 return 0;
1924
1925         active_node = wc->nodes[wc->active_node];
1926         inode_cache = &active_node->inode_cache;
1927         nritems = btrfs_header_nritems(eb);
1928         for (i = 0; i < nritems; i++) {
1929                 btrfs_item_key_to_cpu(eb, &key, i);
1930
1931                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1932                         continue;
1933                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934                         continue;
1935
1936                 if (active_node->current == NULL ||
1937                     active_node->current->ino < key.objectid) {
1938                         if (active_node->current) {
1939                                 active_node->current->checked = 1;
1940                                 maybe_free_inode_rec(inode_cache,
1941                                                      active_node->current);
1942                         }
1943                         active_node->current = get_inode_rec(inode_cache,
1944                                                              key.objectid, 1);
1945                         BUG_ON(IS_ERR(active_node->current));
1946                 }
1947                 switch (key.type) {
1948                 case BTRFS_DIR_ITEM_KEY:
1949                 case BTRFS_DIR_INDEX_KEY:
1950                         ret = process_dir_item(eb, i, &key, active_node);
1951                         break;
1952                 case BTRFS_INODE_REF_KEY:
1953                         ret = process_inode_ref(eb, i, &key, active_node);
1954                         break;
1955                 case BTRFS_INODE_EXTREF_KEY:
1956                         ret = process_inode_extref(eb, i, &key, active_node);
1957                         break;
1958                 case BTRFS_INODE_ITEM_KEY:
1959                         ret = process_inode_item(eb, i, &key, active_node);
1960                         break;
1961                 case BTRFS_EXTENT_DATA_KEY:
1962                         ret = process_file_extent(root, eb, i, &key,
1963                                                   active_node);
1964                         break;
1965                 default:
1966                         break;
1967                 };
1968         }
1969         return ret;
1970 }
1971
1972 struct node_refs {
1973         u64 bytenr[BTRFS_MAX_LEVEL];
1974         u64 refs[BTRFS_MAX_LEVEL];
1975         int need_check[BTRFS_MAX_LEVEL];
1976 };
1977
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979                              struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981                             unsigned int ext_ref);
1982
1983 /*
1984  * Returns >0  Found error, not fatal, should continue
1985  * Returns <0  Fatal error, must exit the whole check
1986  * Returns 0   No errors found
1987  */
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989                                struct node_refs *nrefs, int *level, int ext_ref)
1990 {
1991         struct extent_buffer *cur = path->nodes[0];
1992         struct btrfs_key key;
1993         u64 cur_bytenr;
1994         u32 nritems;
1995         u64 first_ino = 0;
1996         int root_level = btrfs_header_level(root->node);
1997         int i;
1998         int ret = 0; /* Final return value */
1999         int err = 0; /* Positive error bitmap */
2000
2001         cur_bytenr = cur->start;
2002
2003         /* skip to first inode item or the first inode number change */
2004         nritems = btrfs_header_nritems(cur);
2005         for (i = 0; i < nritems; i++) {
2006                 btrfs_item_key_to_cpu(cur, &key, i);
2007                 if (i == 0)
2008                         first_ino = key.objectid;
2009                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010                     (first_ino && first_ino != key.objectid))
2011                         break;
2012         }
2013         if (i == nritems) {
2014                 path->slots[0] = nritems;
2015                 return 0;
2016         }
2017         path->slots[0] = i;
2018
2019 again:
2020         err |= check_inode_item(root, path, ext_ref);
2021
2022         /* modify cur since check_inode_item may change path */
2023         cur = path->nodes[0];
2024
2025         if (err & LAST_ITEM)
2026                 goto out;
2027
2028         /* still have inode items in thie leaf */
2029         if (cur->start == cur_bytenr)
2030                 goto again;
2031
2032         /*
2033          * we have switched to another leaf, above nodes may
2034          * have changed, here walk down the path, if a node
2035          * or leaf is shared, check whether we can skip this
2036          * node or leaf.
2037          */
2038         for (i = root_level; i >= 0; i--) {
2039                 if (path->nodes[i]->start == nrefs->bytenr[i])
2040                         continue;
2041
2042                 ret = update_nodes_refs(root,
2043                                 path->nodes[i]->start,
2044                                 nrefs, i);
2045                 if (ret)
2046                         goto out;
2047
2048                 if (!nrefs->need_check[i]) {
2049                         *level += 1;
2050                         break;
2051                 }
2052         }
2053
2054         for (i = 0; i < *level; i++) {
2055                 free_extent_buffer(path->nodes[i]);
2056                 path->nodes[i] = NULL;
2057         }
2058 out:
2059         err &= ~LAST_ITEM;
2060         if (err && !ret)
2061                 ret = err;
2062         return ret;
2063 }
2064
2065 static void reada_walk_down(struct btrfs_root *root,
2066                             struct extent_buffer *node, int slot)
2067 {
2068         struct btrfs_fs_info *fs_info = root->fs_info;
2069         u64 bytenr;
2070         u64 ptr_gen;
2071         u32 nritems;
2072         int i;
2073         int level;
2074
2075         level = btrfs_header_level(node);
2076         if (level != 1)
2077                 return;
2078
2079         nritems = btrfs_header_nritems(node);
2080         for (i = slot; i < nritems; i++) {
2081                 bytenr = btrfs_node_blockptr(node, i);
2082                 ptr_gen = btrfs_node_ptr_generation(node, i);
2083                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2084         }
2085 }
2086
2087 /*
2088  * Check the child node/leaf by the following condition:
2089  * 1. the first item key of the node/leaf should be the same with the one
2090  *    in parent.
2091  * 2. block in parent node should match the child node/leaf.
2092  * 3. generation of parent node and child's header should be consistent.
2093  *
2094  * Or the child node/leaf pointed by the key in parent is not valid.
2095  *
2096  * We hope to check leaf owner too, but since subvol may share leaves,
2097  * which makes leaf owner check not so strong, key check should be
2098  * sufficient enough for that case.
2099  */
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101                             struct extent_buffer *child)
2102 {
2103         struct btrfs_key parent_key;
2104         struct btrfs_key child_key;
2105         int ret = 0;
2106
2107         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108         if (btrfs_header_level(child) == 0)
2109                 btrfs_item_key_to_cpu(child, &child_key, 0);
2110         else
2111                 btrfs_node_key_to_cpu(child, &child_key, 0);
2112
2113         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114                 ret = -EINVAL;
2115                 fprintf(stderr,
2116                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117                         parent_key.objectid, parent_key.type, parent_key.offset,
2118                         child_key.objectid, child_key.type, child_key.offset);
2119         }
2120         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2121                 ret = -EINVAL;
2122                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123                         btrfs_node_blockptr(parent, slot),
2124                         btrfs_header_bytenr(child));
2125         }
2126         if (btrfs_node_ptr_generation(parent, slot) !=
2127             btrfs_header_generation(child)) {
2128                 ret = -EINVAL;
2129                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130                         btrfs_header_generation(child),
2131                         btrfs_node_ptr_generation(parent, slot));
2132         }
2133         return ret;
2134 }
2135
2136 /*
2137  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138  * in every fs or file tree check. Here we find its all root ids, and only check
2139  * it in the fs or file tree which has the smallest root id.
2140  */
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2142 {
2143         struct rb_node *node;
2144         struct ulist_node *u;
2145
2146         if (roots->nnodes == 1)
2147                 return 1;
2148
2149         node = rb_first(&roots->root);
2150         u = rb_entry(node, struct ulist_node, rb_node);
2151         /*
2152          * current root id is not smallest, we skip it and let it be checked
2153          * in the fs or file tree who hash the smallest root id.
2154          */
2155         if (root->objectid != u->val)
2156                 return 0;
2157
2158         return 1;
2159 }
2160
2161 /*
2162  * for a tree node or leaf, we record its reference count, so later if we still
2163  * process this node or leaf, don't need to compute its reference count again.
2164  */
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166                              struct node_refs *nrefs, u64 level)
2167 {
2168         int check, ret;
2169         u64 refs;
2170         struct ulist *roots;
2171
2172         if (nrefs->bytenr[level] != bytenr) {
2173                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174                                        level, 1, &refs, NULL);
2175                 if (ret < 0)
2176                         return ret;
2177
2178                 nrefs->bytenr[level] = bytenr;
2179                 nrefs->refs[level] = refs;
2180                 if (refs > 1) {
2181                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2182                                                    0, &roots);
2183                         if (ret)
2184                                 return -EIO;
2185
2186                         check = need_check(root, roots);
2187                         ulist_free(roots);
2188                         nrefs->need_check[level] = check;
2189                 } else {
2190                         nrefs->need_check[level] = 1;
2191                 }
2192         }
2193
2194         return 0;
2195 }
2196
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198                           struct walk_control *wc, int *level,
2199                           struct node_refs *nrefs)
2200 {
2201         enum btrfs_tree_block_status status;
2202         u64 bytenr;
2203         u64 ptr_gen;
2204         struct btrfs_fs_info *fs_info = root->fs_info;
2205         struct extent_buffer *next;
2206         struct extent_buffer *cur;
2207         int ret, err = 0;
2208         u64 refs;
2209
2210         WARN_ON(*level < 0);
2211         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2212
2213         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214                 refs = nrefs->refs[*level];
2215                 ret = 0;
2216         } else {
2217                 ret = btrfs_lookup_extent_info(NULL, root,
2218                                        path->nodes[*level]->start,
2219                                        *level, 1, &refs, NULL);
2220                 if (ret < 0) {
2221                         err = ret;
2222                         goto out;
2223                 }
2224                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225                 nrefs->refs[*level] = refs;
2226         }
2227
2228         if (refs > 1) {
2229                 ret = enter_shared_node(root, path->nodes[*level]->start,
2230                                         refs, wc, *level);
2231                 if (ret > 0) {
2232                         err = ret;
2233                         goto out;
2234                 }
2235         }
2236
2237         while (*level >= 0) {
2238                 WARN_ON(*level < 0);
2239                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240                 cur = path->nodes[*level];
2241
2242                 if (btrfs_header_level(cur) != *level)
2243                         WARN_ON(1);
2244
2245                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246                         break;
2247                 if (*level == 0) {
2248                         ret = process_one_leaf(root, cur, wc);
2249                         if (ret < 0)
2250                                 err = ret;
2251                         break;
2252                 }
2253                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2255
2256                 if (bytenr == nrefs->bytenr[*level - 1]) {
2257                         refs = nrefs->refs[*level - 1];
2258                 } else {
2259                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260                                         *level - 1, 1, &refs, NULL);
2261                         if (ret < 0) {
2262                                 refs = 0;
2263                         } else {
2264                                 nrefs->bytenr[*level - 1] = bytenr;
2265                                 nrefs->refs[*level - 1] = refs;
2266                         }
2267                 }
2268
2269                 if (refs > 1) {
2270                         ret = enter_shared_node(root, bytenr, refs,
2271                                                 wc, *level - 1);
2272                         if (ret > 0) {
2273                                 path->slots[*level]++;
2274                                 continue;
2275                         }
2276                 }
2277
2278                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280                         free_extent_buffer(next);
2281                         reada_walk_down(root, cur, path->slots[*level]);
2282                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283                         if (!extent_buffer_uptodate(next)) {
2284                                 struct btrfs_key node_key;
2285
2286                                 btrfs_node_key_to_cpu(path->nodes[*level],
2287                                                       &node_key,
2288                                                       path->slots[*level]);
2289                                 btrfs_add_corrupt_extent_record(root->fs_info,
2290                                                 &node_key,
2291                                                 path->nodes[*level]->start,
2292                                                 root->fs_info->nodesize,
2293                                                 *level);
2294                                 err = -EIO;
2295                                 goto out;
2296                         }
2297                 }
2298
2299                 ret = check_child_node(cur, path->slots[*level], next);
2300                 if (ret) {
2301                         free_extent_buffer(next);
2302                         err = ret;
2303                         goto out;
2304                 }
2305
2306                 if (btrfs_is_leaf(next))
2307                         status = btrfs_check_leaf(root, NULL, next);
2308                 else
2309                         status = btrfs_check_node(root, NULL, next);
2310                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311                         free_extent_buffer(next);
2312                         err = -EIO;
2313                         goto out;
2314                 }
2315
2316                 *level = *level - 1;
2317                 free_extent_buffer(path->nodes[*level]);
2318                 path->nodes[*level] = next;
2319                 path->slots[*level] = 0;
2320         }
2321 out:
2322         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2323         return err;
2324 }
2325
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327                             unsigned int ext_ref);
2328
2329 /*
2330  * Returns >0  Found error, should continue
2331  * Returns <0  Fatal error, must exit the whole check
2332  * Returns 0   No errors found
2333  */
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335                              int *level, struct node_refs *nrefs, int ext_ref)
2336 {
2337         enum btrfs_tree_block_status status;
2338         u64 bytenr;
2339         u64 ptr_gen;
2340         struct btrfs_fs_info *fs_info = root->fs_info;
2341         struct extent_buffer *next;
2342         struct extent_buffer *cur;
2343         int ret;
2344
2345         WARN_ON(*level < 0);
2346         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2347
2348         ret = update_nodes_refs(root, path->nodes[*level]->start,
2349                                 nrefs, *level);
2350         if (ret < 0)
2351                 return ret;
2352
2353         while (*level >= 0) {
2354                 WARN_ON(*level < 0);
2355                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356                 cur = path->nodes[*level];
2357
2358                 if (btrfs_header_level(cur) != *level)
2359                         WARN_ON(1);
2360
2361                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2362                         break;
2363                 /* Don't forgot to check leaf/node validation */
2364                 if (*level == 0) {
2365                         ret = btrfs_check_leaf(root, NULL, cur);
2366                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2367                                 ret = -EIO;
2368                                 break;
2369                         }
2370                         ret = process_one_leaf_v2(root, path, nrefs,
2371                                                   level, ext_ref);
2372                         cur = path->nodes[*level];
2373                         break;
2374                 } else {
2375                         ret = btrfs_check_node(root, NULL, cur);
2376                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2377                                 ret = -EIO;
2378                                 break;
2379                         }
2380                 }
2381                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2383
2384                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385                 if (ret)
2386                         break;
2387                 if (!nrefs->need_check[*level - 1]) {
2388                         path->slots[*level]++;
2389                         continue;
2390                 }
2391
2392                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394                         free_extent_buffer(next);
2395                         reada_walk_down(root, cur, path->slots[*level]);
2396                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2397                         if (!extent_buffer_uptodate(next)) {
2398                                 struct btrfs_key node_key;
2399
2400                                 btrfs_node_key_to_cpu(path->nodes[*level],
2401                                                       &node_key,
2402                                                       path->slots[*level]);
2403                                 btrfs_add_corrupt_extent_record(fs_info,
2404                                                 &node_key,
2405                                                 path->nodes[*level]->start,
2406                                                 fs_info->nodesize,
2407                                                 *level);
2408                                 ret = -EIO;
2409                                 break;
2410                         }
2411                 }
2412
2413                 ret = check_child_node(cur, path->slots[*level], next);
2414                 if (ret < 0) 
2415                         break;
2416
2417                 if (btrfs_is_leaf(next))
2418                         status = btrfs_check_leaf(root, NULL, next);
2419                 else
2420                         status = btrfs_check_node(root, NULL, next);
2421                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422                         free_extent_buffer(next);
2423                         ret = -EIO;
2424                         break;
2425                 }
2426
2427                 *level = *level - 1;
2428                 free_extent_buffer(path->nodes[*level]);
2429                 path->nodes[*level] = next;
2430                 path->slots[*level] = 0;
2431         }
2432         return ret;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int create_inode_item(struct btrfs_root *root,
2695                              struct inode_record *rec,
2696                              int root_dir)
2697 {
2698         struct btrfs_trans_handle *trans;
2699         struct btrfs_inode_item inode_item;
2700         time_t now = time(NULL);
2701         int ret;
2702
2703         trans = btrfs_start_transaction(root, 1);
2704         if (IS_ERR(trans)) {
2705                 ret = PTR_ERR(trans);
2706                 return ret;
2707         }
2708
2709         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2710                 "be incomplete, please check permissions and content after "
2711                 "the fsck completes.\n", (unsigned long long)root->objectid,
2712                 (unsigned long long)rec->ino);
2713
2714         memset(&inode_item, 0, sizeof(inode_item));
2715         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2716         if (root_dir)
2717                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2718         else
2719                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2720         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2721         if (rec->found_dir_item) {
2722                 if (rec->found_file_extent)
2723                         fprintf(stderr, "root %llu inode %llu has both a dir "
2724                                 "item and extents, unsure if it is a dir or a "
2725                                 "regular file so setting it as a directory\n",
2726                                 (unsigned long long)root->objectid,
2727                                 (unsigned long long)rec->ino);
2728                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2729                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2730         } else if (!rec->found_dir_item) {
2731                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2732                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2733         }
2734         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2735         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2736         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2737         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2738         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2739         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2740         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2741         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2742
2743         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2744         BUG_ON(ret);
2745         btrfs_commit_transaction(trans, root);
2746         return 0;
2747 }
2748
2749 static int repair_inode_backrefs(struct btrfs_root *root,
2750                                  struct inode_record *rec,
2751                                  struct cache_tree *inode_cache,
2752                                  int delete)
2753 {
2754         struct inode_backref *tmp, *backref;
2755         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2756         int ret = 0;
2757         int repaired = 0;
2758
2759         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2760                 if (!delete && rec->ino == root_dirid) {
2761                         if (!rec->found_inode_item) {
2762                                 ret = create_inode_item(root, rec, 1);
2763                                 if (ret)
2764                                         break;
2765                                 repaired++;
2766                         }
2767                 }
2768
2769                 /* Index 0 for root dir's are special, don't mess with it */
2770                 if (rec->ino == root_dirid && backref->index == 0)
2771                         continue;
2772
2773                 if (delete &&
2774                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2775                      (backref->found_dir_index && backref->found_inode_ref &&
2776                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2777                         ret = delete_dir_index(root, backref);
2778                         if (ret)
2779                                 break;
2780                         repaired++;
2781                         list_del(&backref->list);
2782                         free(backref);
2783                         continue;
2784                 }
2785
2786                 if (!delete && !backref->found_dir_index &&
2787                     backref->found_dir_item && backref->found_inode_ref) {
2788                         ret = add_missing_dir_index(root, inode_cache, rec,
2789                                                     backref);
2790                         if (ret)
2791                                 break;
2792                         repaired++;
2793                         if (backref->found_dir_item &&
2794                             backref->found_dir_index) {
2795                                 if (!backref->errors &&
2796                                     backref->found_inode_ref) {
2797                                         list_del(&backref->list);
2798                                         free(backref);
2799                                         continue;
2800                                 }
2801                         }
2802                 }
2803
2804                 if (!delete && (!backref->found_dir_index &&
2805                                 !backref->found_dir_item &&
2806                                 backref->found_inode_ref)) {
2807                         struct btrfs_trans_handle *trans;
2808                         struct btrfs_key location;
2809
2810                         ret = check_dir_conflict(root, backref->name,
2811                                                  backref->namelen,
2812                                                  backref->dir,
2813                                                  backref->index);
2814                         if (ret) {
2815                                 /*
2816                                  * let nlink fixing routine to handle it,
2817                                  * which can do it better.
2818                                  */
2819                                 ret = 0;
2820                                 break;
2821                         }
2822                         location.objectid = rec->ino;
2823                         location.type = BTRFS_INODE_ITEM_KEY;
2824                         location.offset = 0;
2825
2826                         trans = btrfs_start_transaction(root, 1);
2827                         if (IS_ERR(trans)) {
2828                                 ret = PTR_ERR(trans);
2829                                 break;
2830                         }
2831                         fprintf(stderr, "adding missing dir index/item pair "
2832                                 "for inode %llu\n",
2833                                 (unsigned long long)rec->ino);
2834                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2835                                                     backref->namelen,
2836                                                     backref->dir, &location,
2837                                                     imode_to_type(rec->imode),
2838                                                     backref->index);
2839                         BUG_ON(ret);
2840                         btrfs_commit_transaction(trans, root);
2841                         repaired++;
2842                 }
2843
2844                 if (!delete && (backref->found_inode_ref &&
2845                                 backref->found_dir_index &&
2846                                 backref->found_dir_item &&
2847                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2848                                 !rec->found_inode_item)) {
2849                         ret = create_inode_item(root, rec, 0);
2850                         if (ret)
2851                                 break;
2852                         repaired++;
2853                 }
2854
2855         }
2856         return ret ? ret : repaired;
2857 }
2858
2859 /*
2860  * To determine the file type for nlink/inode_item repair
2861  *
2862  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2863  * Return -ENOENT if file type is not found.
2864  */
2865 static int find_file_type(struct inode_record *rec, u8 *type)
2866 {
2867         struct inode_backref *backref;
2868
2869         /* For inode item recovered case */
2870         if (rec->found_inode_item) {
2871                 *type = imode_to_type(rec->imode);
2872                 return 0;
2873         }
2874
2875         list_for_each_entry(backref, &rec->backrefs, list) {
2876                 if (backref->found_dir_index || backref->found_dir_item) {
2877                         *type = backref->filetype;
2878                         return 0;
2879                 }
2880         }
2881         return -ENOENT;
2882 }
2883
2884 /*
2885  * To determine the file name for nlink repair
2886  *
2887  * Return 0 if file name is found, set name and namelen.
2888  * Return -ENOENT if file name is not found.
2889  */
2890 static int find_file_name(struct inode_record *rec,
2891                           char *name, int *namelen)
2892 {
2893         struct inode_backref *backref;
2894
2895         list_for_each_entry(backref, &rec->backrefs, list) {
2896                 if (backref->found_dir_index || backref->found_dir_item ||
2897                     backref->found_inode_ref) {
2898                         memcpy(name, backref->name, backref->namelen);
2899                         *namelen = backref->namelen;
2900                         return 0;
2901                 }
2902         }
2903         return -ENOENT;
2904 }
2905
2906 /* Reset the nlink of the inode to the correct one */
2907 static int reset_nlink(struct btrfs_trans_handle *trans,
2908                        struct btrfs_root *root,
2909                        struct btrfs_path *path,
2910                        struct inode_record *rec)
2911 {
2912         struct inode_backref *backref;
2913         struct inode_backref *tmp;
2914         struct btrfs_key key;
2915         struct btrfs_inode_item *inode_item;
2916         int ret = 0;
2917
2918         /* We don't believe this either, reset it and iterate backref */
2919         rec->found_link = 0;
2920
2921         /* Remove all backref including the valid ones */
2922         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2923                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2924                                    backref->index, backref->name,
2925                                    backref->namelen, 0);
2926                 if (ret < 0)
2927                         goto out;
2928
2929                 /* remove invalid backref, so it won't be added back */
2930                 if (!(backref->found_dir_index &&
2931                       backref->found_dir_item &&
2932                       backref->found_inode_ref)) {
2933                         list_del(&backref->list);
2934                         free(backref);
2935                 } else {
2936                         rec->found_link++;
2937                 }
2938         }
2939
2940         /* Set nlink to 0 */
2941         key.objectid = rec->ino;
2942         key.type = BTRFS_INODE_ITEM_KEY;
2943         key.offset = 0;
2944         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2945         if (ret < 0)
2946                 goto out;
2947         if (ret > 0) {
2948                 ret = -ENOENT;
2949                 goto out;
2950         }
2951         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2952                                     struct btrfs_inode_item);
2953         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2954         btrfs_mark_buffer_dirty(path->nodes[0]);
2955         btrfs_release_path(path);
2956
2957         /*
2958          * Add back valid inode_ref/dir_item/dir_index,
2959          * add_link() will handle the nlink inc, so new nlink must be correct
2960          */
2961         list_for_each_entry(backref, &rec->backrefs, list) {
2962                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2963                                      backref->name, backref->namelen,
2964                                      backref->filetype, &backref->index, 1);
2965                 if (ret < 0)
2966                         goto out;
2967         }
2968 out:
2969         btrfs_release_path(path);
2970         return ret;
2971 }
2972
2973 static int get_highest_inode(struct btrfs_trans_handle *trans,
2974                                 struct btrfs_root *root,
2975                                 struct btrfs_path *path,
2976                                 u64 *highest_ino)
2977 {
2978         struct btrfs_key key, found_key;
2979         int ret;
2980
2981         btrfs_init_path(path);
2982         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2983         key.offset = -1;
2984         key.type = BTRFS_INODE_ITEM_KEY;
2985         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2986         if (ret == 1) {
2987                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2988                                 path->slots[0] - 1);
2989                 *highest_ino = found_key.objectid;
2990                 ret = 0;
2991         }
2992         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2993                 ret = -EOVERFLOW;
2994         btrfs_release_path(path);
2995         return ret;
2996 }
2997
2998 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2999                                struct btrfs_root *root,
3000                                struct btrfs_path *path,
3001                                struct inode_record *rec)
3002 {
3003         char *dir_name = "lost+found";
3004         char namebuf[BTRFS_NAME_LEN] = {0};
3005         u64 lost_found_ino;
3006         u32 mode = 0700;
3007         u8 type = 0;
3008         int namelen = 0;
3009         int name_recovered = 0;
3010         int type_recovered = 0;
3011         int ret = 0;
3012
3013         /*
3014          * Get file name and type first before these invalid inode ref
3015          * are deleted by remove_all_invalid_backref()
3016          */
3017         name_recovered = !find_file_name(rec, namebuf, &namelen);
3018         type_recovered = !find_file_type(rec, &type);
3019
3020         if (!name_recovered) {
3021                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3022                        rec->ino, rec->ino);
3023                 namelen = count_digits(rec->ino);
3024                 sprintf(namebuf, "%llu", rec->ino);
3025                 name_recovered = 1;
3026         }
3027         if (!type_recovered) {
3028                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3029                        rec->ino);
3030                 type = BTRFS_FT_REG_FILE;
3031                 type_recovered = 1;
3032         }
3033
3034         ret = reset_nlink(trans, root, path, rec);
3035         if (ret < 0) {
3036                 fprintf(stderr,
3037                         "Failed to reset nlink for inode %llu: %s\n",
3038                         rec->ino, strerror(-ret));
3039                 goto out;
3040         }
3041
3042         if (rec->found_link == 0) {
3043                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3044                 if (ret < 0)
3045                         goto out;
3046                 lost_found_ino++;
3047                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3048                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3049                                   mode);
3050                 if (ret < 0) {
3051                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3052                                 dir_name, strerror(-ret));
3053                         goto out;
3054                 }
3055                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3056                                      namebuf, namelen, type, NULL, 1);
3057                 /*
3058                  * Add ".INO" suffix several times to handle case where
3059                  * "FILENAME.INO" is already taken by another file.
3060                  */
3061                 while (ret == -EEXIST) {
3062                         /*
3063                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3064                          */
3065                         if (namelen + count_digits(rec->ino) + 1 >
3066                             BTRFS_NAME_LEN) {
3067                                 ret = -EFBIG;
3068                                 goto out;
3069                         }
3070                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3071                                  ".%llu", rec->ino);
3072                         namelen += count_digits(rec->ino) + 1;
3073                         ret = btrfs_add_link(trans, root, rec->ino,
3074                                              lost_found_ino, namebuf,
3075                                              namelen, type, NULL, 1);
3076                 }
3077                 if (ret < 0) {
3078                         fprintf(stderr,
3079                                 "Failed to link the inode %llu to %s dir: %s\n",
3080                                 rec->ino, dir_name, strerror(-ret));
3081                         goto out;
3082                 }
3083                 /*
3084                  * Just increase the found_link, don't actually add the
3085                  * backref. This will make things easier and this inode
3086                  * record will be freed after the repair is done.
3087                  * So fsck will not report problem about this inode.
3088                  */
3089                 rec->found_link++;
3090                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3091                        namelen, namebuf, dir_name);
3092         }
3093         printf("Fixed the nlink of inode %llu\n", rec->ino);
3094 out:
3095         /*
3096          * Clear the flag anyway, or we will loop forever for the same inode
3097          * as it will not be removed from the bad inode list and the dead loop
3098          * happens.
3099          */
3100         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3101         btrfs_release_path(path);
3102         return ret;
3103 }
3104
3105 /*
3106  * Check if there is any normal(reg or prealloc) file extent for given
3107  * ino.
3108  * This is used to determine the file type when neither its dir_index/item or
3109  * inode_item exists.
3110  *
3111  * This will *NOT* report error, if any error happens, just consider it does
3112  * not have any normal file extent.
3113  */
3114 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3115 {
3116         struct btrfs_path path;
3117         struct btrfs_key key;
3118         struct btrfs_key found_key;
3119         struct btrfs_file_extent_item *fi;
3120         u8 type;
3121         int ret = 0;
3122
3123         btrfs_init_path(&path);
3124         key.objectid = ino;
3125         key.type = BTRFS_EXTENT_DATA_KEY;
3126         key.offset = 0;
3127
3128         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3129         if (ret < 0) {
3130                 ret = 0;
3131                 goto out;
3132         }
3133         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3134                 ret = btrfs_next_leaf(root, &path);
3135                 if (ret) {
3136                         ret = 0;
3137                         goto out;
3138                 }
3139         }
3140         while (1) {
3141                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3142                                       path.slots[0]);
3143                 if (found_key.objectid != ino ||
3144                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3145                         break;
3146                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3147                                     struct btrfs_file_extent_item);
3148                 type = btrfs_file_extent_type(path.nodes[0], fi);
3149                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3150                         ret = 1;
3151                         goto out;
3152                 }
3153         }
3154 out:
3155         btrfs_release_path(&path);
3156         return ret;
3157 }
3158
3159 static u32 btrfs_type_to_imode(u8 type)
3160 {
3161         static u32 imode_by_btrfs_type[] = {
3162                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3163                 [BTRFS_FT_DIR]          = S_IFDIR,
3164                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3165                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3166                 [BTRFS_FT_FIFO]         = S_IFIFO,
3167                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3168                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3169         };
3170
3171         return imode_by_btrfs_type[(type)];
3172 }
3173
3174 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3175                                 struct btrfs_root *root,
3176                                 struct btrfs_path *path,
3177                                 struct inode_record *rec)
3178 {
3179         u8 filetype;
3180         u32 mode = 0700;
3181         int type_recovered = 0;
3182         int ret = 0;
3183
3184         printf("Trying to rebuild inode:%llu\n", rec->ino);
3185
3186         type_recovered = !find_file_type(rec, &filetype);
3187
3188         /*
3189          * Try to determine inode type if type not found.
3190          *
3191          * For found regular file extent, it must be FILE.
3192          * For found dir_item/index, it must be DIR.
3193          *
3194          * For undetermined one, use FILE as fallback.
3195          *
3196          * TODO:
3197          * 1. If found backref(inode_index/item is already handled) to it,
3198          *    it must be DIR.
3199          *    Need new inode-inode ref structure to allow search for that.
3200          */
3201         if (!type_recovered) {
3202                 if (rec->found_file_extent &&
3203                     find_normal_file_extent(root, rec->ino)) {
3204                         type_recovered = 1;
3205                         filetype = BTRFS_FT_REG_FILE;
3206                 } else if (rec->found_dir_item) {
3207                         type_recovered = 1;
3208                         filetype = BTRFS_FT_DIR;
3209                 } else if (!list_empty(&rec->orphan_extents)) {
3210                         type_recovered = 1;
3211                         filetype = BTRFS_FT_REG_FILE;
3212                 } else{
3213                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3214                                rec->ino);
3215                         type_recovered = 1;
3216                         filetype = BTRFS_FT_REG_FILE;
3217                 }
3218         }
3219
3220         ret = btrfs_new_inode(trans, root, rec->ino,
3221                               mode | btrfs_type_to_imode(filetype));
3222         if (ret < 0)
3223                 goto out;
3224
3225         /*
3226          * Here inode rebuild is done, we only rebuild the inode item,
3227          * don't repair the nlink(like move to lost+found).
3228          * That is the job of nlink repair.
3229          *
3230          * We just fill the record and return
3231          */
3232         rec->found_dir_item = 1;
3233         rec->imode = mode | btrfs_type_to_imode(filetype);
3234         rec->nlink = 0;
3235         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3236         /* Ensure the inode_nlinks repair function will be called */
3237         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3238 out:
3239         return ret;
3240 }
3241
3242 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3243                                       struct btrfs_root *root,
3244                                       struct btrfs_path *path,
3245                                       struct inode_record *rec)
3246 {
3247         struct orphan_data_extent *orphan;
3248         struct orphan_data_extent *tmp;
3249         int ret = 0;
3250
3251         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3252                 /*
3253                  * Check for conflicting file extents
3254                  *
3255                  * Here we don't know whether the extents is compressed or not,
3256                  * so we can only assume it not compressed nor data offset,
3257                  * and use its disk_len as extent length.
3258                  */
3259                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3260                                        orphan->offset, orphan->disk_len, 0);
3261                 btrfs_release_path(path);
3262                 if (ret < 0)
3263                         goto out;
3264                 if (!ret) {
3265                         fprintf(stderr,
3266                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3267                                 orphan->disk_bytenr, orphan->disk_len);
3268                         ret = btrfs_free_extent(trans,
3269                                         root->fs_info->extent_root,
3270                                         orphan->disk_bytenr, orphan->disk_len,
3271                                         0, root->objectid, orphan->objectid,
3272                                         orphan->offset);
3273                         if (ret < 0)
3274                                 goto out;
3275                 }
3276                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3277                                 orphan->offset, orphan->disk_bytenr,
3278                                 orphan->disk_len, orphan->disk_len);
3279                 if (ret < 0)
3280                         goto out;
3281
3282                 /* Update file size info */
3283                 rec->found_size += orphan->disk_len;
3284                 if (rec->found_size == rec->nbytes)
3285                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3286
3287                 /* Update the file extent hole info too */
3288                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3289                                            orphan->disk_len);
3290                 if (ret < 0)
3291                         goto out;
3292                 if (RB_EMPTY_ROOT(&rec->holes))
3293                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3294
3295                 list_del(&orphan->list);
3296                 free(orphan);
3297         }
3298         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3299 out:
3300         return ret;
3301 }
3302
3303 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3304                                         struct btrfs_root *root,
3305                                         struct btrfs_path *path,
3306                                         struct inode_record *rec)
3307 {
3308         struct rb_node *node;
3309         struct file_extent_hole *hole;
3310         int found = 0;
3311         int ret = 0;
3312
3313         node = rb_first(&rec->holes);
3314
3315         while (node) {
3316                 found = 1;
3317                 hole = rb_entry(node, struct file_extent_hole, node);
3318                 ret = btrfs_punch_hole(trans, root, rec->ino,
3319                                        hole->start, hole->len);
3320                 if (ret < 0)
3321                         goto out;
3322                 ret = del_file_extent_hole(&rec->holes, hole->start,
3323                                            hole->len);
3324                 if (ret < 0)
3325                         goto out;
3326                 if (RB_EMPTY_ROOT(&rec->holes))
3327                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3328                 node = rb_first(&rec->holes);
3329         }
3330         /* special case for a file losing all its file extent */
3331         if (!found) {
3332                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3333                                        round_up(rec->isize,
3334                                                 root->fs_info->sectorsize));
3335                 if (ret < 0)
3336                         goto out;
3337         }
3338         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3339                rec->ino, root->objectid);
3340 out:
3341         return ret;
3342 }
3343
3344 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3345 {
3346         struct btrfs_trans_handle *trans;
3347         struct btrfs_path path;
3348         int ret = 0;
3349
3350         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3351                              I_ERR_NO_ORPHAN_ITEM |
3352                              I_ERR_LINK_COUNT_WRONG |
3353                              I_ERR_NO_INODE_ITEM |
3354                              I_ERR_FILE_EXTENT_ORPHAN |
3355                              I_ERR_FILE_EXTENT_DISCOUNT|
3356                              I_ERR_FILE_NBYTES_WRONG)))
3357                 return rec->errors;
3358
3359         /*
3360          * For nlink repair, it may create a dir and add link, so
3361          * 2 for parent(256)'s dir_index and dir_item
3362          * 2 for lost+found dir's inode_item and inode_ref
3363          * 1 for the new inode_ref of the file
3364          * 2 for lost+found dir's dir_index and dir_item for the file
3365          */
3366         trans = btrfs_start_transaction(root, 7);
3367         if (IS_ERR(trans))
3368                 return PTR_ERR(trans);
3369
3370         btrfs_init_path(&path);
3371         if (rec->errors & I_ERR_NO_INODE_ITEM)
3372                 ret = repair_inode_no_item(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3374                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3376                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3378                 ret = repair_inode_isize(trans, root, &path, rec);
3379         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3380                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3381         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3382                 ret = repair_inode_nlinks(trans, root, &path, rec);
3383         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3384                 ret = repair_inode_nbytes(trans, root, &path, rec);
3385         btrfs_commit_transaction(trans, root);
3386         btrfs_release_path(&path);
3387         return ret;
3388 }
3389
3390 static int check_inode_recs(struct btrfs_root *root,
3391                             struct cache_tree *inode_cache)
3392 {
3393         struct cache_extent *cache;
3394         struct ptr_node *node;
3395         struct inode_record *rec;
3396         struct inode_backref *backref;
3397         int stage = 0;
3398         int ret = 0;
3399         int err = 0;
3400         u64 error = 0;
3401         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3402
3403         if (btrfs_root_refs(&root->root_item) == 0) {
3404                 if (!cache_tree_empty(inode_cache))
3405                         fprintf(stderr, "warning line %d\n", __LINE__);
3406                 return 0;
3407         }
3408
3409         /*
3410          * We need to repair backrefs first because we could change some of the
3411          * errors in the inode recs.
3412          *
3413          * We also need to go through and delete invalid backrefs first and then
3414          * add the correct ones second.  We do this because we may get EEXIST
3415          * when adding back the correct index because we hadn't yet deleted the
3416          * invalid index.
3417          *
3418          * For example, if we were missing a dir index then the directories
3419          * isize would be wrong, so if we fixed the isize to what we thought it
3420          * would be and then fixed the backref we'd still have a invalid fs, so
3421          * we need to add back the dir index and then check to see if the isize
3422          * is still wrong.
3423          */
3424         while (stage < 3) {
3425                 stage++;
3426                 if (stage == 3 && !err)
3427                         break;
3428
3429                 cache = search_cache_extent(inode_cache, 0);
3430                 while (repair && cache) {
3431                         node = container_of(cache, struct ptr_node, cache);
3432                         rec = node->data;
3433                         cache = next_cache_extent(cache);
3434
3435                         /* Need to free everything up and rescan */
3436                         if (stage == 3) {
3437                                 remove_cache_extent(inode_cache, &node->cache);
3438                                 free(node);
3439                                 free_inode_rec(rec);
3440                                 continue;
3441                         }
3442
3443                         if (list_empty(&rec->backrefs))
3444                                 continue;
3445
3446                         ret = repair_inode_backrefs(root, rec, inode_cache,
3447                                                     stage == 1);
3448                         if (ret < 0) {
3449                                 err = ret;
3450                                 stage = 2;
3451                                 break;
3452                         } if (ret > 0) {
3453                                 err = -EAGAIN;
3454                         }
3455                 }
3456         }
3457         if (err)
3458                 return err;
3459
3460         rec = get_inode_rec(inode_cache, root_dirid, 0);
3461         BUG_ON(IS_ERR(rec));
3462         if (rec) {
3463                 ret = check_root_dir(rec);
3464                 if (ret) {
3465                         fprintf(stderr, "root %llu root dir %llu error\n",
3466                                 (unsigned long long)root->root_key.objectid,
3467                                 (unsigned long long)root_dirid);
3468                         print_inode_error(root, rec);
3469                         error++;
3470                 }
3471         } else {
3472                 if (repair) {
3473                         struct btrfs_trans_handle *trans;
3474
3475                         trans = btrfs_start_transaction(root, 1);
3476                         if (IS_ERR(trans)) {
3477                                 err = PTR_ERR(trans);
3478                                 return err;
3479                         }
3480
3481                         fprintf(stderr,
3482                                 "root %llu missing its root dir, recreating\n",
3483                                 (unsigned long long)root->objectid);
3484
3485                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3486                         BUG_ON(ret);
3487
3488                         btrfs_commit_transaction(trans, root);
3489                         return -EAGAIN;
3490                 }
3491
3492                 fprintf(stderr, "root %llu root dir %llu not found\n",
3493                         (unsigned long long)root->root_key.objectid,
3494                         (unsigned long long)root_dirid);
3495         }
3496
3497         while (1) {
3498                 cache = search_cache_extent(inode_cache, 0);
3499                 if (!cache)
3500                         break;
3501                 node = container_of(cache, struct ptr_node, cache);
3502                 rec = node->data;
3503                 remove_cache_extent(inode_cache, &node->cache);
3504                 free(node);
3505                 if (rec->ino == root_dirid ||
3506                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3507                         free_inode_rec(rec);
3508                         continue;
3509                 }
3510
3511                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3512                         ret = check_orphan_item(root, rec->ino);
3513                         if (ret == 0)
3514                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3515                         if (can_free_inode_rec(rec)) {
3516                                 free_inode_rec(rec);
3517                                 continue;
3518                         }
3519                 }
3520
3521                 if (!rec->found_inode_item)
3522                         rec->errors |= I_ERR_NO_INODE_ITEM;
3523                 if (rec->found_link != rec->nlink)
3524                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3525                 if (repair) {
3526                         ret = try_repair_inode(root, rec);
3527                         if (ret == 0 && can_free_inode_rec(rec)) {
3528                                 free_inode_rec(rec);
3529                                 continue;
3530                         }
3531                         ret = 0;
3532                 }
3533
3534                 if (!(repair && ret == 0))
3535                         error++;
3536                 print_inode_error(root, rec);
3537                 list_for_each_entry(backref, &rec->backrefs, list) {
3538                         if (!backref->found_dir_item)
3539                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3540                         if (!backref->found_dir_index)
3541                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3542                         if (!backref->found_inode_ref)
3543                                 backref->errors |= REF_ERR_NO_INODE_REF;
3544                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3545                                 " namelen %u name %s filetype %d errors %x",
3546                                 (unsigned long long)backref->dir,
3547                                 (unsigned long long)backref->index,
3548                                 backref->namelen, backref->name,
3549                                 backref->filetype, backref->errors);
3550                         print_ref_error(backref->errors);
3551                 }
3552                 free_inode_rec(rec);
3553         }
3554         return (error > 0) ? -1 : 0;
3555 }
3556
3557 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3558                                         u64 objectid)
3559 {
3560         struct cache_extent *cache;
3561         struct root_record *rec = NULL;
3562         int ret;
3563
3564         cache = lookup_cache_extent(root_cache, objectid, 1);
3565         if (cache) {
3566                 rec = container_of(cache, struct root_record, cache);
3567         } else {
3568                 rec = calloc(1, sizeof(*rec));
3569                 if (!rec)
3570                         return ERR_PTR(-ENOMEM);
3571                 rec->objectid = objectid;
3572                 INIT_LIST_HEAD(&rec->backrefs);
3573                 rec->cache.start = objectid;
3574                 rec->cache.size = 1;
3575
3576                 ret = insert_cache_extent(root_cache, &rec->cache);
3577                 if (ret)
3578                         return ERR_PTR(-EEXIST);
3579         }
3580         return rec;
3581 }
3582
3583 static struct root_backref *get_root_backref(struct root_record *rec,
3584                                              u64 ref_root, u64 dir, u64 index,
3585                                              const char *name, int namelen)
3586 {
3587         struct root_backref *backref;
3588
3589         list_for_each_entry(backref, &rec->backrefs, list) {
3590                 if (backref->ref_root != ref_root || backref->dir != dir ||
3591                     backref->namelen != namelen)
3592                         continue;
3593                 if (memcmp(name, backref->name, namelen))
3594                         continue;
3595                 return backref;
3596         }
3597
3598         backref = calloc(1, sizeof(*backref) + namelen + 1);
3599         if (!backref)
3600                 return NULL;
3601         backref->ref_root = ref_root;
3602         backref->dir = dir;
3603         backref->index = index;
3604         backref->namelen = namelen;
3605         memcpy(backref->name, name, namelen);
3606         backref->name[namelen] = '\0';
3607         list_add_tail(&backref->list, &rec->backrefs);
3608         return backref;
3609 }
3610
3611 static void free_root_record(struct cache_extent *cache)
3612 {
3613         struct root_record *rec;
3614         struct root_backref *backref;
3615
3616         rec = container_of(cache, struct root_record, cache);
3617         while (!list_empty(&rec->backrefs)) {
3618                 backref = to_root_backref(rec->backrefs.next);
3619                 list_del(&backref->list);
3620                 free(backref);
3621         }
3622
3623         free(rec);
3624 }
3625
3626 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3627
3628 static int add_root_backref(struct cache_tree *root_cache,
3629                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3630                             const char *name, int namelen,
3631                             int item_type, int errors)
3632 {
3633         struct root_record *rec;
3634         struct root_backref *backref;
3635
3636         rec = get_root_rec(root_cache, root_id);
3637         BUG_ON(IS_ERR(rec));
3638         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3639         BUG_ON(!backref);
3640
3641         backref->errors |= errors;
3642
3643         if (item_type != BTRFS_DIR_ITEM_KEY) {
3644                 if (backref->found_dir_index || backref->found_back_ref ||
3645                     backref->found_forward_ref) {
3646                         if (backref->index != index)
3647                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3648                 } else {
3649                         backref->index = index;
3650                 }
3651         }
3652
3653         if (item_type == BTRFS_DIR_ITEM_KEY) {
3654                 if (backref->found_forward_ref)
3655                         rec->found_ref++;
3656                 backref->found_dir_item = 1;
3657         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3658                 backref->found_dir_index = 1;
3659         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3660                 if (backref->found_forward_ref)
3661                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3662                 else if (backref->found_dir_item)
3663                         rec->found_ref++;
3664                 backref->found_forward_ref = 1;
3665         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3666                 if (backref->found_back_ref)
3667                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3668                 backref->found_back_ref = 1;
3669         } else {
3670                 BUG_ON(1);
3671         }
3672
3673         if (backref->found_forward_ref && backref->found_dir_item)
3674                 backref->reachable = 1;
3675         return 0;
3676 }
3677
3678 static int merge_root_recs(struct btrfs_root *root,
3679                            struct cache_tree *src_cache,
3680                            struct cache_tree *dst_cache)
3681 {
3682         struct cache_extent *cache;
3683         struct ptr_node *node;
3684         struct inode_record *rec;
3685         struct inode_backref *backref;
3686         int ret = 0;
3687
3688         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3689                 free_inode_recs_tree(src_cache);
3690                 return 0;
3691         }
3692
3693         while (1) {
3694                 cache = search_cache_extent(src_cache, 0);
3695                 if (!cache)
3696                         break;
3697                 node = container_of(cache, struct ptr_node, cache);
3698                 rec = node->data;
3699                 remove_cache_extent(src_cache, &node->cache);
3700                 free(node);
3701
3702                 ret = is_child_root(root, root->objectid, rec->ino);
3703                 if (ret < 0)
3704                         break;
3705                 else if (ret == 0)
3706                         goto skip;
3707
3708                 list_for_each_entry(backref, &rec->backrefs, list) {
3709                         BUG_ON(backref->found_inode_ref);
3710                         if (backref->found_dir_item)
3711                                 add_root_backref(dst_cache, rec->ino,
3712                                         root->root_key.objectid, backref->dir,
3713                                         backref->index, backref->name,
3714                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3715                                         backref->errors);
3716                         if (backref->found_dir_index)
3717                                 add_root_backref(dst_cache, rec->ino,
3718                                         root->root_key.objectid, backref->dir,
3719                                         backref->index, backref->name,
3720                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3721                                         backref->errors);
3722                 }
3723 skip:
3724                 free_inode_rec(rec);
3725         }
3726         if (ret < 0)
3727                 return ret;
3728         return 0;
3729 }
3730
3731 static int check_root_refs(struct btrfs_root *root,
3732                            struct cache_tree *root_cache)
3733 {
3734         struct root_record *rec;
3735         struct root_record *ref_root;
3736         struct root_backref *backref;
3737         struct cache_extent *cache;
3738         int loop = 1;
3739         int ret;
3740         int error;
3741         int errors = 0;
3742
3743         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3744         BUG_ON(IS_ERR(rec));
3745         rec->found_ref = 1;
3746
3747         /* fixme: this can not detect circular references */
3748         while (loop) {
3749                 loop = 0;
3750                 cache = search_cache_extent(root_cache, 0);
3751                 while (1) {
3752                         if (!cache)
3753                                 break;
3754                         rec = container_of(cache, struct root_record, cache);
3755                         cache = next_cache_extent(cache);
3756
3757                         if (rec->found_ref == 0)
3758                                 continue;
3759
3760                         list_for_each_entry(backref, &rec->backrefs, list) {
3761                                 if (!backref->reachable)
3762                                         continue;
3763
3764                                 ref_root = get_root_rec(root_cache,
3765                                                         backref->ref_root);
3766                                 BUG_ON(IS_ERR(ref_root));
3767                                 if (ref_root->found_ref > 0)
3768                                         continue;
3769
3770                                 backref->reachable = 0;
3771                                 rec->found_ref--;
3772                                 if (rec->found_ref == 0)
3773                                         loop = 1;
3774                         }
3775                 }
3776         }
3777
3778         cache = search_cache_extent(root_cache, 0);
3779         while (1) {
3780                 if (!cache)
3781                         break;
3782                 rec = container_of(cache, struct root_record, cache);
3783                 cache = next_cache_extent(cache);
3784
3785                 if (rec->found_ref == 0 &&
3786                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3787                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3788                         ret = check_orphan_item(root->fs_info->tree_root,
3789                                                 rec->objectid);
3790                         if (ret == 0)
3791                                 continue;
3792
3793                         /*
3794                          * If we don't have a root item then we likely just have
3795                          * a dir item in a snapshot for this root but no actual
3796                          * ref key or anything so it's meaningless.
3797                          */
3798                         if (!rec->found_root_item)
3799                                 continue;
3800                         errors++;
3801                         fprintf(stderr, "fs tree %llu not referenced\n",
3802                                 (unsigned long long)rec->objectid);
3803                 }
3804
3805                 error = 0;
3806                 if (rec->found_ref > 0 && !rec->found_root_item)
3807                         error = 1;
3808                 list_for_each_entry(backref, &rec->backrefs, list) {
3809                         if (!backref->found_dir_item)
3810                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3811                         if (!backref->found_dir_index)
3812                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3813                         if (!backref->found_back_ref)
3814                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3815                         if (!backref->found_forward_ref)
3816                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3817                         if (backref->reachable && backref->errors)
3818                                 error = 1;
3819                 }
3820                 if (!error)
3821                         continue;
3822
3823                 errors++;
3824                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3825                         (unsigned long long)rec->objectid, rec->found_ref,
3826                          rec->found_root_item ? "" : "not found");
3827
3828                 list_for_each_entry(backref, &rec->backrefs, list) {
3829                         if (!backref->reachable)
3830                                 continue;
3831                         if (!backref->errors && rec->found_root_item)
3832                                 continue;
3833                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3834                                 " index %llu namelen %u name %s errors %x\n",
3835                                 (unsigned long long)backref->ref_root,
3836                                 (unsigned long long)backref->dir,
3837                                 (unsigned long long)backref->index,
3838                                 backref->namelen, backref->name,
3839                                 backref->errors);
3840                         print_ref_error(backref->errors);
3841                 }
3842         }
3843         return errors > 0 ? 1 : 0;
3844 }
3845
3846 static int process_root_ref(struct extent_buffer *eb, int slot,
3847                             struct btrfs_key *key,
3848                             struct cache_tree *root_cache)
3849 {
3850         u64 dirid;
3851         u64 index;
3852         u32 len;
3853         u32 name_len;
3854         struct btrfs_root_ref *ref;
3855         char namebuf[BTRFS_NAME_LEN];
3856         int error;
3857
3858         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3859
3860         dirid = btrfs_root_ref_dirid(eb, ref);
3861         index = btrfs_root_ref_sequence(eb, ref);
3862         name_len = btrfs_root_ref_name_len(eb, ref);
3863
3864         if (name_len <= BTRFS_NAME_LEN) {
3865                 len = name_len;
3866                 error = 0;
3867         } else {
3868                 len = BTRFS_NAME_LEN;
3869                 error = REF_ERR_NAME_TOO_LONG;
3870         }
3871         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3872
3873         if (key->type == BTRFS_ROOT_REF_KEY) {
3874                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3875                                  index, namebuf, len, key->type, error);
3876         } else {
3877                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3878                                  index, namebuf, len, key->type, error);
3879         }
3880         return 0;
3881 }
3882
3883 static void free_corrupt_block(struct cache_extent *cache)
3884 {
3885         struct btrfs_corrupt_block *corrupt;
3886
3887         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3888         free(corrupt);
3889 }
3890
3891 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3892
3893 /*
3894  * Repair the btree of the given root.
3895  *
3896  * The fix is to remove the node key in corrupt_blocks cache_tree.
3897  * and rebalance the tree.
3898  * After the fix, the btree should be writeable.
3899  */
3900 static int repair_btree(struct btrfs_root *root,
3901                         struct cache_tree *corrupt_blocks)
3902 {
3903         struct btrfs_trans_handle *trans;
3904         struct btrfs_path path;
3905         struct btrfs_corrupt_block *corrupt;
3906         struct cache_extent *cache;
3907         struct btrfs_key key;
3908         u64 offset;
3909         int level;
3910         int ret = 0;
3911
3912         if (cache_tree_empty(corrupt_blocks))
3913                 return 0;
3914
3915         trans = btrfs_start_transaction(root, 1);
3916         if (IS_ERR(trans)) {
3917                 ret = PTR_ERR(trans);
3918                 fprintf(stderr, "Error starting transaction: %s\n",
3919                         strerror(-ret));
3920                 return ret;
3921         }
3922         btrfs_init_path(&path);
3923         cache = first_cache_extent(corrupt_blocks);
3924         while (cache) {
3925                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3926                                        cache);
3927                 level = corrupt->level;
3928                 path.lowest_level = level;
3929                 key.objectid = corrupt->key.objectid;
3930                 key.type = corrupt->key.type;
3931                 key.offset = corrupt->key.offset;
3932
3933                 /*
3934                  * Here we don't want to do any tree balance, since it may
3935                  * cause a balance with corrupted brother leaf/node,
3936                  * so ins_len set to 0 here.
3937                  * Balance will be done after all corrupt node/leaf is deleted.
3938                  */
3939                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3940                 if (ret < 0)
3941                         goto out;
3942                 offset = btrfs_node_blockptr(path.nodes[level],
3943                                              path.slots[level]);
3944
3945                 /* Remove the ptr */
3946                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3947                 if (ret < 0)
3948                         goto out;
3949                 /*
3950                  * Remove the corresponding extent
3951                  * return value is not concerned.
3952                  */
3953                 btrfs_release_path(&path);
3954                 ret = btrfs_free_extent(trans, root, offset,
3955                                 root->fs_info->nodesize, 0,
3956                                 root->root_key.objectid, level - 1, 0);
3957                 cache = next_cache_extent(cache);
3958         }
3959
3960         /* Balance the btree using btrfs_search_slot() */
3961         cache = first_cache_extent(corrupt_blocks);
3962         while (cache) {
3963                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3964                                        cache);
3965                 memcpy(&key, &corrupt->key, sizeof(key));
3966                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3967                 if (ret < 0)
3968                         goto out;
3969                 /* return will always >0 since it won't find the item */
3970                 ret = 0;
3971                 btrfs_release_path(&path);
3972                 cache = next_cache_extent(cache);
3973         }
3974 out:
3975         btrfs_commit_transaction(trans, root);
3976         btrfs_release_path(&path);
3977         return ret;
3978 }
3979
3980 static int check_fs_root(struct btrfs_root *root,
3981                          struct cache_tree *root_cache,
3982                          struct walk_control *wc)
3983 {
3984         int ret = 0;
3985         int err = 0;
3986         int wret;
3987         int level;
3988         struct btrfs_path path;
3989         struct shared_node root_node;
3990         struct root_record *rec;
3991         struct btrfs_root_item *root_item = &root->root_item;
3992         struct cache_tree corrupt_blocks;
3993         struct orphan_data_extent *orphan;
3994         struct orphan_data_extent *tmp;
3995         enum btrfs_tree_block_status status;
3996         struct node_refs nrefs;
3997
3998         /*
3999          * Reuse the corrupt_block cache tree to record corrupted tree block
4000          *
4001          * Unlike the usage in extent tree check, here we do it in a per
4002          * fs/subvol tree base.
4003          */
4004         cache_tree_init(&corrupt_blocks);
4005         root->fs_info->corrupt_blocks = &corrupt_blocks;
4006
4007         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4008                 rec = get_root_rec(root_cache, root->root_key.objectid);
4009                 BUG_ON(IS_ERR(rec));
4010                 if (btrfs_root_refs(root_item) > 0)
4011                         rec->found_root_item = 1;
4012         }
4013
4014         btrfs_init_path(&path);
4015         memset(&root_node, 0, sizeof(root_node));
4016         cache_tree_init(&root_node.root_cache);
4017         cache_tree_init(&root_node.inode_cache);
4018         memset(&nrefs, 0, sizeof(nrefs));
4019
4020         /* Move the orphan extent record to corresponding inode_record */
4021         list_for_each_entry_safe(orphan, tmp,
4022                                  &root->orphan_data_extents, list) {
4023                 struct inode_record *inode;
4024
4025                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4026                                       1);
4027                 BUG_ON(IS_ERR(inode));
4028                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4029                 list_move(&orphan->list, &inode->orphan_extents);
4030         }
4031
4032         level = btrfs_header_level(root->node);
4033         memset(wc->nodes, 0, sizeof(wc->nodes));
4034         wc->nodes[level] = &root_node;
4035         wc->active_node = level;
4036         wc->root_level = level;
4037
4038         /* We may not have checked the root block, lets do that now */
4039         if (btrfs_is_leaf(root->node))
4040                 status = btrfs_check_leaf(root, NULL, root->node);
4041         else
4042                 status = btrfs_check_node(root, NULL, root->node);
4043         if (status != BTRFS_TREE_BLOCK_CLEAN)
4044                 return -EIO;
4045
4046         if (btrfs_root_refs(root_item) > 0 ||
4047             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4048                 path.nodes[level] = root->node;
4049                 extent_buffer_get(root->node);
4050                 path.slots[level] = 0;
4051         } else {
4052                 struct btrfs_key key;
4053                 struct btrfs_disk_key found_key;
4054
4055                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4056                 level = root_item->drop_level;
4057                 path.lowest_level = level;
4058                 if (level > btrfs_header_level(root->node) ||
4059                     level >= BTRFS_MAX_LEVEL) {
4060                         error("ignoring invalid drop level: %u", level);
4061                         goto skip_walking;
4062                 }
4063                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4064                 if (wret < 0)
4065                         goto skip_walking;
4066                 btrfs_node_key(path.nodes[level], &found_key,
4067                                 path.slots[level]);
4068                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4069                                         sizeof(found_key)));
4070         }
4071
4072         while (1) {
4073                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4074                 if (wret < 0)
4075                         ret = wret;
4076                 if (wret != 0)
4077                         break;
4078
4079                 wret = walk_up_tree(root, &path, wc, &level);
4080                 if (wret < 0)
4081                         ret = wret;
4082                 if (wret != 0)
4083                         break;
4084         }
4085 skip_walking:
4086         btrfs_release_path(&path);
4087
4088         if (!cache_tree_empty(&corrupt_blocks)) {
4089                 struct cache_extent *cache;
4090                 struct btrfs_corrupt_block *corrupt;
4091
4092                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4093                        root->root_key.objectid);
4094                 cache = first_cache_extent(&corrupt_blocks);
4095                 while (cache) {
4096                         corrupt = container_of(cache,
4097                                                struct btrfs_corrupt_block,
4098                                                cache);
4099                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4100                                cache->start, corrupt->level,
4101                                corrupt->key.objectid, corrupt->key.type,
4102                                corrupt->key.offset);
4103                         cache = next_cache_extent(cache);
4104                 }
4105                 if (repair) {
4106                         printf("Try to repair the btree for root %llu\n",
4107                                root->root_key.objectid);
4108                         ret = repair_btree(root, &corrupt_blocks);
4109                         if (ret < 0)
4110                                 fprintf(stderr, "Failed to repair btree: %s\n",
4111                                         strerror(-ret));
4112                         if (!ret)
4113                                 printf("Btree for root %llu is fixed\n",
4114                                        root->root_key.objectid);
4115                 }
4116         }
4117
4118         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4119         if (err < 0)
4120                 ret = err;
4121
4122         if (root_node.current) {
4123                 root_node.current->checked = 1;
4124                 maybe_free_inode_rec(&root_node.inode_cache,
4125                                 root_node.current);
4126         }
4127
4128         err = check_inode_recs(root, &root_node.inode_cache);
4129         if (!ret)
4130                 ret = err;
4131
4132         free_corrupt_blocks_tree(&corrupt_blocks);
4133         root->fs_info->corrupt_blocks = NULL;
4134         free_orphan_data_extents(&root->orphan_data_extents);
4135         return ret;
4136 }
4137
4138 static int fs_root_objectid(u64 objectid)
4139 {
4140         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4141             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4142                 return 1;
4143         return is_fstree(objectid);
4144 }
4145
4146 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4147                           struct cache_tree *root_cache)
4148 {
4149         struct btrfs_path path;
4150         struct btrfs_key key;
4151         struct walk_control wc;
4152         struct extent_buffer *leaf, *tree_node;
4153         struct btrfs_root *tmp_root;
4154         struct btrfs_root *tree_root = fs_info->tree_root;
4155         int ret;
4156         int err = 0;
4157
4158         if (ctx.progress_enabled) {
4159                 ctx.tp = TASK_FS_ROOTS;
4160                 task_start(ctx.info);
4161         }
4162
4163         /*
4164          * Just in case we made any changes to the extent tree that weren't
4165          * reflected into the free space cache yet.
4166          */
4167         if (repair)
4168                 reset_cached_block_groups(fs_info);
4169         memset(&wc, 0, sizeof(wc));
4170         cache_tree_init(&wc.shared);
4171         btrfs_init_path(&path);
4172
4173 again:
4174         key.offset = 0;
4175         key.objectid = 0;
4176         key.type = BTRFS_ROOT_ITEM_KEY;
4177         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4178         if (ret < 0) {
4179                 err = 1;
4180                 goto out;
4181         }
4182         tree_node = tree_root->node;
4183         while (1) {
4184                 if (tree_node != tree_root->node) {
4185                         free_root_recs_tree(root_cache);
4186                         btrfs_release_path(&path);
4187                         goto again;
4188                 }
4189                 leaf = path.nodes[0];
4190                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4191                         ret = btrfs_next_leaf(tree_root, &path);
4192                         if (ret) {
4193                                 if (ret < 0)
4194                                         err = 1;
4195                                 break;
4196                         }
4197                         leaf = path.nodes[0];
4198                 }
4199                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4200                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4201                     fs_root_objectid(key.objectid)) {
4202                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4203                                 tmp_root = btrfs_read_fs_root_no_cache(
4204                                                 fs_info, &key);
4205                         } else {
4206                                 key.offset = (u64)-1;
4207                                 tmp_root = btrfs_read_fs_root(
4208                                                 fs_info, &key);
4209                         }
4210                         if (IS_ERR(tmp_root)) {
4211                                 err = 1;
4212                                 goto next;
4213                         }
4214                         ret = check_fs_root(tmp_root, root_cache, &wc);
4215                         if (ret == -EAGAIN) {
4216                                 free_root_recs_tree(root_cache);
4217                                 btrfs_release_path(&path);
4218                                 goto again;
4219                         }
4220                         if (ret)
4221                                 err = 1;
4222                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4223                                 btrfs_free_fs_root(tmp_root);
4224                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4225                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4226                         process_root_ref(leaf, path.slots[0], &key,
4227                                          root_cache);
4228                 }
4229 next:
4230                 path.slots[0]++;
4231         }
4232 out:
4233         btrfs_release_path(&path);
4234         if (err)
4235                 free_extent_cache_tree(&wc.shared);
4236         if (!cache_tree_empty(&wc.shared))
4237                 fprintf(stderr, "warning line %d\n", __LINE__);
4238
4239         task_stop(ctx.info);
4240
4241         return err;
4242 }
4243
4244 /*
4245  * Find the @index according by @ino and name.
4246  * Notice:time efficiency is O(N)
4247  *
4248  * @root:       the root of the fs/file tree
4249  * @index_ret:  the index as return value
4250  * @namebuf:    the name to match
4251  * @name_len:   the length of name to match
4252  * @file_type:  the file_type of INODE_ITEM to match
4253  *
4254  * Returns 0 if found and *@index_ret will be modified with right value
4255  * Returns< 0 not found and *@index_ret will be (u64)-1
4256  */
4257 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4258                           u64 *index_ret, char *namebuf, u32 name_len,
4259                           u8 file_type)
4260 {
4261         struct btrfs_path path;
4262         struct extent_buffer *node;
4263         struct btrfs_dir_item *di;
4264         struct btrfs_key key;
4265         struct btrfs_key location;
4266         char name[BTRFS_NAME_LEN] = {0};
4267
4268         u32 total;
4269         u32 cur = 0;
4270         u32 len;
4271         u32 data_len;
4272         u8 filetype;
4273         int slot;
4274         int ret;
4275
4276         ASSERT(index_ret);
4277
4278         /* search from the last index */
4279         key.objectid = dirid;
4280         key.offset = (u64)-1;
4281         key.type = BTRFS_DIR_INDEX_KEY;
4282
4283         btrfs_init_path(&path);
4284         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4285         if (ret < 0)
4286                 return ret;
4287
4288 loop:
4289         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4290         if (ret) {
4291                 ret = -ENOENT;
4292                 *index_ret = (64)-1;
4293                 goto out;
4294         }
4295         /* Check whether inode_id/filetype/name match */
4296         node = path.nodes[0];
4297         slot = path.slots[0];
4298         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4299         total = btrfs_item_size_nr(node, slot);
4300         while (cur < total) {
4301                 ret = -ENOENT;
4302                 len = btrfs_dir_name_len(node, di);
4303                 data_len = btrfs_dir_data_len(node, di);
4304
4305                 btrfs_dir_item_key_to_cpu(node, di, &location);
4306                 if (location.objectid != location_id ||
4307                     location.type != BTRFS_INODE_ITEM_KEY ||
4308                     location.offset != 0)
4309                         goto next;
4310
4311                 filetype = btrfs_dir_type(node, di);
4312                 if (file_type != filetype)
4313                         goto next;
4314
4315                 if (len > BTRFS_NAME_LEN)
4316                         len = BTRFS_NAME_LEN;
4317
4318                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4319                 if (len != name_len || strncmp(namebuf, name, len))
4320                         goto next;
4321
4322                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4323                 *index_ret = key.offset;
4324                 ret = 0;
4325                 goto out;
4326 next:
4327                 len += sizeof(*di) + data_len;
4328                 di = (struct btrfs_dir_item *)((char *)di + len);
4329                 cur += len;
4330         }
4331         goto loop;
4332
4333 out:
4334         btrfs_release_path(&path);
4335         return ret;
4336 }
4337
4338 /*
4339  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4340  * INODE_REF/INODE_EXTREF match.
4341  *
4342  * @root:       the root of the fs/file tree
4343  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4344  *              value while find index
4345  * @location_key: location key of the struct btrfs_dir_item to match
4346  * @name:       the name to match
4347  * @namelen:    the length of name
4348  * @file_type:  the type of file to math
4349  *
4350  * Return 0 if no error occurred.
4351  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4352  * DIR_ITEM/DIR_INDEX
4353  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4354  * and DIR_ITEM/DIR_INDEX mismatch
4355  */
4356 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4357                          struct btrfs_key *location_key, char *name,
4358                          u32 namelen, u8 file_type)
4359 {
4360         struct btrfs_path path;
4361         struct extent_buffer *node;
4362         struct btrfs_dir_item *di;
4363         struct btrfs_key location;
4364         char namebuf[BTRFS_NAME_LEN] = {0};
4365         u32 total;
4366         u32 cur = 0;
4367         u32 len;
4368         u32 data_len;
4369         u8 filetype;
4370         int slot;
4371         int ret;
4372
4373         /* get the index by traversing all index */
4374         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4375                 ret = find_dir_index(root, key->objectid,
4376                                      location_key->objectid, &key->offset,
4377                                      name, namelen, file_type);
4378                 if (ret)
4379                         ret = DIR_INDEX_MISSING;
4380                 return ret;
4381         }
4382
4383         btrfs_init_path(&path);
4384         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4385         if (ret) {
4386                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4387                         DIR_INDEX_MISSING;
4388                 goto out;
4389         }
4390
4391         /* Check whether inode_id/filetype/name match */
4392         node = path.nodes[0];
4393         slot = path.slots[0];
4394         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4395         total = btrfs_item_size_nr(node, slot);
4396         while (cur < total) {
4397                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4398                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4399
4400                 len = btrfs_dir_name_len(node, di);
4401                 data_len = btrfs_dir_data_len(node, di);
4402
4403                 btrfs_dir_item_key_to_cpu(node, di, &location);
4404                 if (location.objectid != location_key->objectid ||
4405                     location.type != location_key->type ||
4406                     location.offset != location_key->offset)
4407                         goto next;
4408
4409                 filetype = btrfs_dir_type(node, di);
4410                 if (file_type != filetype)
4411                         goto next;
4412
4413                 if (len > BTRFS_NAME_LEN) {
4414                         len = BTRFS_NAME_LEN;
4415                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4416                         root->objectid,
4417                         key->type == BTRFS_DIR_ITEM_KEY ?
4418                         "DIR_ITEM" : "DIR_INDEX",
4419                         key->objectid, key->offset, len);
4420                 }
4421                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4422                                    len);
4423                 if (len != namelen || strncmp(namebuf, name, len))
4424                         goto next;
4425
4426                 ret = 0;
4427                 goto out;
4428 next:
4429                 len += sizeof(*di) + data_len;
4430                 di = (struct btrfs_dir_item *)((char *)di + len);
4431                 cur += len;
4432         }
4433
4434 out:
4435         btrfs_release_path(&path);
4436         return ret;
4437 }
4438
4439 /*
4440  * Prints inode ref error message
4441  */
4442 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4443                                 u64 index, const char *namebuf, int name_len,
4444                                 u8 filetype, int err)
4445 {
4446         if (!err)
4447                 return;
4448
4449         /* root dir error */
4450         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4451                 error(
4452         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4453                       root->objectid, key->objectid, key->offset, namebuf);
4454                 return;
4455         }
4456
4457         /* normal error */
4458         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4459                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4460                       root->objectid, key->offset,
4461                       btrfs_name_hash(namebuf, name_len),
4462                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4463                       namebuf, filetype);
4464         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4465                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4466                       root->objectid, key->offset, index,
4467                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4468                       namebuf, filetype);
4469 }
4470
4471 /*
4472  * Traverse the given INODE_REF and call find_dir_item() to find related
4473  * DIR_ITEM/DIR_INDEX.
4474  *
4475  * @root:       the root of the fs/file tree
4476  * @ref_key:    the key of the INODE_REF
4477  * @refs:       the count of INODE_REF
4478  * @mode:       the st_mode of INODE_ITEM
4479  *
4480  * Return 0 if no error occurred.
4481  */
4482 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4483                            struct btrfs_path *path, char *name_ret,
4484                            u32 *namelen_ret, u64 *refs, int mode)
4485 {
4486         struct btrfs_key key;
4487         struct btrfs_key location;
4488         struct btrfs_inode_ref *ref;
4489         struct extent_buffer *node;
4490         char namebuf[BTRFS_NAME_LEN] = {0};
4491         u32 total;
4492         u32 cur = 0;
4493         u32 len;
4494         u32 name_len;
4495         u64 index;
4496         int err = 0;
4497         int tmp_err;
4498         int slot;
4499
4500         location.objectid = ref_key->objectid;
4501         location.type = BTRFS_INODE_ITEM_KEY;
4502         location.offset = 0;
4503         node = path->nodes[0];
4504         slot = path->slots[0];
4505
4506         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4507         total = btrfs_item_size_nr(node, slot);
4508
4509 next:
4510         /* Update inode ref count */
4511         (*refs)++;
4512
4513         tmp_err = 0;
4514         index = btrfs_inode_ref_index(node, ref);
4515         name_len = btrfs_inode_ref_name_len(node, ref);
4516         if (cur + sizeof(*ref) + name_len > total ||
4517             name_len > BTRFS_NAME_LEN) {
4518                 warning("root %llu INODE_REF[%llu %llu] name too long",
4519                         root->objectid, ref_key->objectid, ref_key->offset);
4520
4521                 if (total < cur + sizeof(*ref))
4522                         goto out;
4523                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4524         } else {
4525                 len = name_len;
4526         }
4527
4528         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4529
4530         /* copy the fisrt name found to name_ret */
4531         if (*refs == 1 && name_ret) {
4532                 memcpy(name_ret, namebuf, len);
4533                 *namelen_ret = len;
4534         }
4535
4536         /* Check root dir ref */
4537         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4538                 if (index != 0 || len != strlen("..") ||
4539                     strncmp("..", namebuf, len) ||
4540                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4541                         /* set err bits then repair will delete the ref */
4542                         err |= DIR_INDEX_MISSING;
4543                         err |= DIR_ITEM_MISSING;
4544                 }
4545                 goto end;
4546         }
4547
4548         /* Find related DIR_INDEX */
4549         key.objectid = ref_key->offset;
4550         key.type = BTRFS_DIR_INDEX_KEY;
4551         key.offset = index;
4552         tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4553
4554         /* Find related dir_item */
4555         key.objectid = ref_key->offset;
4556         key.type = BTRFS_DIR_ITEM_KEY;
4557         key.offset = btrfs_name_hash(namebuf, len);
4558         tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4559
4560 end:
4561         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4562                             imode_to_type(mode), tmp_err);
4563         err |= tmp_err;
4564         len = sizeof(*ref) + name_len;
4565         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4566         cur += len;
4567         if (cur < total)
4568                 goto next;
4569
4570 out:
4571         return err;
4572 }
4573
4574 /*
4575  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4576  * DIR_ITEM/DIR_INDEX.
4577  *
4578  * @root:       the root of the fs/file tree
4579  * @ref_key:    the key of the INODE_EXTREF
4580  * @refs:       the count of INODE_EXTREF
4581  * @mode:       the st_mode of INODE_ITEM
4582  *
4583  * Return 0 if no error occurred.
4584  */
4585 static int check_inode_extref(struct btrfs_root *root,
4586                               struct btrfs_key *ref_key,
4587                               struct extent_buffer *node, int slot, u64 *refs,
4588                               int mode)
4589 {
4590         struct btrfs_key key;
4591         struct btrfs_key location;
4592         struct btrfs_inode_extref *extref;
4593         char namebuf[BTRFS_NAME_LEN] = {0};
4594         u32 total;
4595         u32 cur = 0;
4596         u32 len;
4597         u32 name_len;
4598         u64 index;
4599         u64 parent;
4600         int ret;
4601         int err = 0;
4602
4603         location.objectid = ref_key->objectid;
4604         location.type = BTRFS_INODE_ITEM_KEY;
4605         location.offset = 0;
4606
4607         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4608         total = btrfs_item_size_nr(node, slot);
4609
4610 next:
4611         /* update inode ref count */
4612         (*refs)++;
4613         name_len = btrfs_inode_extref_name_len(node, extref);
4614         index = btrfs_inode_extref_index(node, extref);
4615         parent = btrfs_inode_extref_parent(node, extref);
4616         if (name_len <= BTRFS_NAME_LEN) {
4617                 len = name_len;
4618         } else {
4619                 len = BTRFS_NAME_LEN;
4620                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4621                         root->objectid, ref_key->objectid, ref_key->offset);
4622         }
4623         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4624
4625         /* Check root dir ref name */
4626         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4627                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4628                       root->objectid, ref_key->objectid, ref_key->offset,
4629                       namebuf);
4630                 err |= ROOT_DIR_ERROR;
4631         }
4632
4633         /* find related dir_index */
4634         key.objectid = parent;
4635         key.type = BTRFS_DIR_INDEX_KEY;
4636         key.offset = index;
4637         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4638         err |= ret;
4639
4640         /* find related dir_item */
4641         key.objectid = parent;
4642         key.type = BTRFS_DIR_ITEM_KEY;
4643         key.offset = btrfs_name_hash(namebuf, len);
4644         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4645         err |= ret;
4646
4647         len = sizeof(*extref) + name_len;
4648         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4649         cur += len;
4650
4651         if (cur < total)
4652                 goto next;
4653
4654         return err;
4655 }
4656
4657 /*
4658  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4659  * DIR_ITEM/DIR_INDEX match.
4660  * Return with @index_ret.
4661  *
4662  * @root:       the root of the fs/file tree
4663  * @key:        the key of the INODE_REF/INODE_EXTREF
4664  * @name:       the name in the INODE_REF/INODE_EXTREF
4665  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4666  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4667  *              value (64)-1 means do not check index
4668  * @ext_ref:    the EXTENDED_IREF feature
4669  *
4670  * Return 0 if no error occurred.
4671  * Return >0 for error bitmap
4672  */
4673 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4674                           char *name, int namelen, u64 *index_ret,
4675                           unsigned int ext_ref)
4676 {
4677         struct btrfs_path path;
4678         struct btrfs_inode_ref *ref;
4679         struct btrfs_inode_extref *extref;
4680         struct extent_buffer *node;
4681         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4682         u32 total;
4683         u32 cur = 0;
4684         u32 len;
4685         u32 ref_namelen;
4686         u64 ref_index;
4687         u64 parent;
4688         u64 dir_id;
4689         int slot;
4690         int ret;
4691
4692         ASSERT(index_ret);
4693
4694         btrfs_init_path(&path);
4695         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4696         if (ret) {
4697                 ret = INODE_REF_MISSING;
4698                 goto extref;
4699         }
4700
4701         node = path.nodes[0];
4702         slot = path.slots[0];
4703
4704         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4705         total = btrfs_item_size_nr(node, slot);
4706
4707         /* Iterate all entry of INODE_REF */
4708         while (cur < total) {
4709                 ret = INODE_REF_MISSING;
4710
4711                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4712                 ref_index = btrfs_inode_ref_index(node, ref);
4713                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4714                         goto next_ref;
4715
4716                 if (cur + sizeof(*ref) + ref_namelen > total ||
4717                     ref_namelen > BTRFS_NAME_LEN) {
4718                         warning("root %llu INODE %s[%llu %llu] name too long",
4719                                 root->objectid,
4720                                 key->type == BTRFS_INODE_REF_KEY ?
4721                                         "REF" : "EXTREF",
4722                                 key->objectid, key->offset);
4723
4724                         if (cur + sizeof(*ref) > total)
4725                                 break;
4726                         len = min_t(u32, total - cur - sizeof(*ref),
4727                                     BTRFS_NAME_LEN);
4728                 } else {
4729                         len = ref_namelen;
4730                 }
4731
4732                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4733                                    len);
4734
4735                 if (len != namelen || strncmp(ref_namebuf, name, len))
4736                         goto next_ref;
4737
4738                 *index_ret = ref_index;
4739                 ret = 0;
4740                 goto out;
4741 next_ref:
4742                 len = sizeof(*ref) + ref_namelen;
4743                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4744                 cur += len;
4745         }
4746
4747 extref:
4748         /* Skip if not support EXTENDED_IREF feature */
4749         if (!ext_ref)
4750                 goto out;
4751
4752         btrfs_release_path(&path);
4753         btrfs_init_path(&path);
4754
4755         dir_id = key->offset;
4756         key->type = BTRFS_INODE_EXTREF_KEY;
4757         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4758
4759         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4760         if (ret) {
4761                 ret = INODE_REF_MISSING;
4762                 goto out;
4763         }
4764
4765         node = path.nodes[0];
4766         slot = path.slots[0];
4767
4768         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4769         cur = 0;
4770         total = btrfs_item_size_nr(node, slot);
4771
4772         /* Iterate all entry of INODE_EXTREF */
4773         while (cur < total) {
4774                 ret = INODE_REF_MISSING;
4775
4776                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4777                 ref_index = btrfs_inode_extref_index(node, extref);
4778                 parent = btrfs_inode_extref_parent(node, extref);
4779                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4780                         goto next_extref;
4781
4782                 if (parent != dir_id)
4783                         goto next_extref;
4784
4785                 if (ref_namelen <= BTRFS_NAME_LEN) {
4786                         len = ref_namelen;
4787                 } else {
4788                         len = BTRFS_NAME_LEN;
4789                         warning("root %llu INODE %s[%llu %llu] name too long",
4790                                 root->objectid,
4791                                 key->type == BTRFS_INODE_REF_KEY ?
4792                                         "REF" : "EXTREF",
4793                                 key->objectid, key->offset);
4794                 }
4795                 read_extent_buffer(node, ref_namebuf,
4796                                    (unsigned long)(extref + 1), len);
4797
4798                 if (len != namelen || strncmp(ref_namebuf, name, len))
4799                         goto next_extref;
4800
4801                 *index_ret = ref_index;
4802                 ret = 0;
4803                 goto out;
4804
4805 next_extref:
4806                 len = sizeof(*extref) + ref_namelen;
4807                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4808                 cur += len;
4809
4810         }
4811 out:
4812         btrfs_release_path(&path);
4813         return ret;
4814 }
4815
4816 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4817                                u64 ino, u64 index, const char *namebuf,
4818                                int name_len, u8 filetype, int err)
4819 {
4820         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4821                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4822                       root->objectid, key->objectid, key->offset, namebuf,
4823                       filetype,
4824                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4825         }
4826
4827         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4828                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4829                       root->objectid, key->objectid, index, namebuf, filetype,
4830                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4831         }
4832
4833         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4834                 error(
4835                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4836                       root->objectid, ino, index, namebuf, filetype,
4837                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4838         }
4839
4840         if (err & INODE_REF_MISSING)
4841                 error(
4842                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4843                       root->objectid, ino, key->objectid, namebuf, filetype);
4844
4845 }
4846
4847 /*
4848  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4849  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4850  *
4851  * @root:       the root of the fs/file tree
4852  * @key:        the key of the INODE_REF/INODE_EXTREF
4853  * @path:       the path
4854  * @size:       the st_size of the INODE_ITEM
4855  * @ext_ref:    the EXTENDED_IREF feature
4856  *
4857  * Return 0 if no error occurred.
4858  */
4859 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4860                           struct btrfs_path *path, u64 *size,
4861                           unsigned int ext_ref)
4862 {
4863         struct btrfs_dir_item *di;
4864         struct btrfs_inode_item *ii;
4865         struct btrfs_key key;
4866         struct btrfs_key location;
4867         struct extent_buffer *node;
4868         int slot;
4869         char namebuf[BTRFS_NAME_LEN] = {0};
4870         u32 total;
4871         u32 cur = 0;
4872         u32 len;
4873         u32 name_len;
4874         u32 data_len;
4875         u8 filetype;
4876         u32 mode;
4877         u64 index;
4878         int ret;
4879         int err = 0;
4880         int tmp_err;
4881
4882         node = path->nodes[0];
4883         slot = path->slots[0];
4884         /*
4885          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4886          * ignore index check.
4887          */
4888         if (di_key->type == BTRFS_DIR_INDEX_KEY)
4889                 index = di_key->offset;
4890         else
4891                 index = (u64)-1;
4892
4893         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4894         total = btrfs_item_size_nr(node, slot);
4895         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4896
4897         while (cur < total) {
4898                 data_len = btrfs_dir_data_len(node, di);
4899                 tmp_err = 0;
4900                 if (data_len)
4901                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4902                               root->objectid,
4903               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4904                               di_key->objectid, di_key->offset, data_len);
4905
4906                 name_len = btrfs_dir_name_len(node, di);
4907                 if (name_len <= BTRFS_NAME_LEN) {
4908                         len = name_len;
4909                 } else {
4910                         len = BTRFS_NAME_LEN;
4911                         warning("root %llu %s[%llu %llu] name too long",
4912                                 root->objectid,
4913                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4914                                 di_key->objectid, di_key->offset);
4915                 }
4916                 (*size) += name_len;
4917                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4918                                    len);
4919                 filetype = btrfs_dir_type(node, di);
4920
4921                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
4922                     di_key->offset != btrfs_name_hash(namebuf, len)) {
4923                         err |= -EIO;
4924                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4925                         root->objectid, di_key->objectid, di_key->offset,
4926                         namebuf, len, filetype, di_key->offset,
4927                         btrfs_name_hash(namebuf, len));
4928                 }
4929
4930                 btrfs_dir_item_key_to_cpu(node, di, &location);
4931                 /* Ignore related ROOT_ITEM check */
4932                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4933                         goto next;
4934
4935                 btrfs_release_path(path);
4936                 /* Check relative INODE_ITEM(existence/filetype) */
4937                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
4938                 if (ret) {
4939                         tmp_err |= INODE_ITEM_MISSING;
4940                         goto next;
4941                 }
4942
4943                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
4944                                     struct btrfs_inode_item);
4945                 mode = btrfs_inode_mode(path->nodes[0], ii);
4946                 if (imode_to_type(mode) != filetype) {
4947                         tmp_err |= INODE_ITEM_MISMATCH;
4948                         goto next;
4949                 }
4950
4951                 /* Check relative INODE_REF/INODE_EXTREF */
4952                 key.objectid = location.objectid;
4953                 key.type = BTRFS_INODE_REF_KEY;
4954                 key.offset = di_key->objectid;
4955                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
4956                                           &index, ext_ref);
4957
4958                 /* check relative INDEX/ITEM */
4959                 key.objectid = di_key->objectid;
4960                 if (key.type == BTRFS_DIR_ITEM_KEY) {
4961                         key.type = BTRFS_DIR_INDEX_KEY;
4962                         key.offset = index;
4963                 } else {
4964                         key.type = BTRFS_DIR_ITEM_KEY;
4965                         key.offset = btrfs_name_hash(namebuf, name_len);
4966                 }
4967
4968                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
4969                                          name_len, filetype);
4970                 /* find_dir_item may find index */
4971                 if (key.type == BTRFS_DIR_INDEX_KEY)
4972                         index = key.offset;
4973 next:
4974                 btrfs_release_path(path);
4975                 print_dir_item_err(root, di_key, location.objectid, index,
4976                                    namebuf, name_len, filetype, tmp_err);
4977                 err |= tmp_err;
4978                 len = sizeof(*di) + name_len + data_len;
4979                 di = (struct btrfs_dir_item *)((char *)di + len);
4980                 cur += len;
4981
4982                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4983                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4984                               root->objectid, di_key->objectid,
4985                               di_key->offset);
4986                         break;
4987                 }
4988         }
4989
4990         /* research path */
4991         btrfs_release_path(path);
4992         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
4993         if (ret)
4994                 err |= ret > 0 ? -ENOENT : ret;
4995         return err;
4996 }
4997
4998 /*
4999  * Check file extent datasum/hole, update the size of the file extents,
5000  * check and update the last offset of the file extent.
5001  *
5002  * @root:       the root of fs/file tree.
5003  * @fkey:       the key of the file extent.
5004  * @nodatasum:  INODE_NODATASUM feature.
5005  * @size:       the sum of all EXTENT_DATA items size for this inode.
5006  * @end:        the offset of the last extent.
5007  *
5008  * Return 0 if no error occurred.
5009  */
5010 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5011                              struct extent_buffer *node, int slot,
5012                              unsigned int nodatasum, u64 *size, u64 *end)
5013 {
5014         struct btrfs_file_extent_item *fi;
5015         u64 disk_bytenr;
5016         u64 disk_num_bytes;
5017         u64 extent_num_bytes;
5018         u64 extent_offset;
5019         u64 csum_found;         /* In byte size, sectorsize aligned */
5020         u64 search_start;       /* Logical range start we search for csum */
5021         u64 search_len;         /* Logical range len we search for csum */
5022         unsigned int extent_type;
5023         unsigned int is_hole;
5024         int compressed = 0;
5025         int ret;
5026         int err = 0;
5027
5028         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5029
5030         /* Check inline extent */
5031         extent_type = btrfs_file_extent_type(node, fi);
5032         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5033                 struct btrfs_item *e = btrfs_item_nr(slot);
5034                 u32 item_inline_len;
5035
5036                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5037                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5038                 compressed = btrfs_file_extent_compression(node, fi);
5039                 if (extent_num_bytes == 0) {
5040                         error(
5041                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5042                                 root->objectid, fkey->objectid, fkey->offset);
5043                         err |= FILE_EXTENT_ERROR;
5044                 }
5045                 if (!compressed && extent_num_bytes != item_inline_len) {
5046                         error(
5047                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5048                                 root->objectid, fkey->objectid, fkey->offset,
5049                                 extent_num_bytes, item_inline_len);
5050                         err |= FILE_EXTENT_ERROR;
5051                 }
5052                 *end += extent_num_bytes;
5053                 *size += extent_num_bytes;
5054                 return err;
5055         }
5056
5057         /* Check extent type */
5058         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5059                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5060                 err |= FILE_EXTENT_ERROR;
5061                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5062                       root->objectid, fkey->objectid, fkey->offset);
5063                 return err;
5064         }
5065
5066         /* Check REG_EXTENT/PREALLOC_EXTENT */
5067         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5068         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5069         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5070         extent_offset = btrfs_file_extent_offset(node, fi);
5071         compressed = btrfs_file_extent_compression(node, fi);
5072         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5073
5074         /*
5075          * Check EXTENT_DATA csum
5076          *
5077          * For plain (uncompressed) extent, we should only check the range
5078          * we're referring to, as it's possible that part of prealloc extent
5079          * has been written, and has csum:
5080          *
5081          * |<--- Original large preallocated extent A ---->|
5082          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5083          *      No csum                         Has csum
5084          *
5085          * For compressed extent, we should check the whole range.
5086          */
5087         if (!compressed) {
5088                 search_start = disk_bytenr + extent_offset;
5089                 search_len = extent_num_bytes;
5090         } else {
5091                 search_start = disk_bytenr;
5092                 search_len = disk_num_bytes;
5093         }
5094         ret = count_csum_range(root, search_start, search_len, &csum_found);
5095         if (csum_found > 0 && nodatasum) {
5096                 err |= ODD_CSUM_ITEM;
5097                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5098                       root->objectid, fkey->objectid, fkey->offset);
5099         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5100                    !is_hole && (ret < 0 || csum_found < search_len)) {
5101                 err |= CSUM_ITEM_MISSING;
5102                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5103                       root->objectid, fkey->objectid, fkey->offset,
5104                       csum_found, search_len);
5105         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5106                 err |= ODD_CSUM_ITEM;
5107                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5108                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5109         }
5110
5111         /* Check EXTENT_DATA hole */
5112         if (!no_holes && *end != fkey->offset) {
5113                 err |= FILE_EXTENT_ERROR;
5114                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5115                       root->objectid, fkey->objectid, fkey->offset);
5116         }
5117
5118         *end += extent_num_bytes;
5119         if (!is_hole)
5120                 *size += extent_num_bytes;
5121
5122         return err;
5123 }
5124
5125 /*
5126  * Set inode item nbytes to @nbytes
5127  *
5128  * Returns  0     on success
5129  * Returns  != 0  on error
5130  */
5131 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5132                                       struct btrfs_path *path,
5133                                       u64 ino, u64 nbytes)
5134 {
5135         struct btrfs_trans_handle *trans;
5136         struct btrfs_inode_item *ii;
5137         struct btrfs_key key;
5138         struct btrfs_key research_key;
5139         int err = 0;
5140         int ret;
5141
5142         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5143
5144         key.objectid = ino;
5145         key.type = BTRFS_INODE_ITEM_KEY;
5146         key.offset = 0;
5147
5148         trans = btrfs_start_transaction(root, 1);
5149         if (IS_ERR(trans)) {
5150                 ret = PTR_ERR(trans);
5151                 err |= ret;
5152                 goto out;
5153         }
5154
5155         btrfs_release_path(path);
5156         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5157         if (ret > 0)
5158                 ret = -ENOENT;
5159         if (ret) {
5160                 err |= ret;
5161                 goto fail;
5162         }
5163
5164         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5165                             struct btrfs_inode_item);
5166         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5167         btrfs_mark_buffer_dirty(path->nodes[0]);
5168 fail:
5169         btrfs_commit_transaction(trans, root);
5170 out:
5171         if (ret)
5172                 error("failed to set nbytes in inode %llu root %llu",
5173                       ino, root->root_key.objectid);
5174         else
5175                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5176                        root->root_key.objectid, nbytes);
5177
5178         /* research path */
5179         btrfs_release_path(path);
5180         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5181         err |= ret;
5182
5183         return err;
5184 }
5185
5186 /*
5187  * Set directory inode isize to @isize.
5188  *
5189  * Returns 0     on success.
5190  * Returns != 0  on error.
5191  */
5192 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5193                                    struct btrfs_path *path,
5194                                    u64 ino, u64 isize)
5195 {
5196         struct btrfs_trans_handle *trans;
5197         struct btrfs_inode_item *ii;
5198         struct btrfs_key key;
5199         struct btrfs_key research_key;
5200         int ret;
5201         int err = 0;
5202
5203         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5204
5205         key.objectid = ino;
5206         key.type = BTRFS_INODE_ITEM_KEY;
5207         key.offset = 0;
5208
5209         trans = btrfs_start_transaction(root, 1);
5210         if (IS_ERR(trans)) {
5211                 ret = PTR_ERR(trans);
5212                 err |= ret;
5213                 goto out;
5214         }
5215
5216         btrfs_release_path(path);
5217         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5218         if (ret > 0)
5219                 ret = -ENOENT;
5220         if (ret) {
5221                 err |= ret;
5222                 goto fail;
5223         }
5224
5225         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5226                             struct btrfs_inode_item);
5227         btrfs_set_inode_size(path->nodes[0], ii, isize);
5228         btrfs_mark_buffer_dirty(path->nodes[0]);
5229 fail:
5230         btrfs_commit_transaction(trans, root);
5231 out:
5232         if (ret)
5233                 error("failed to set isize in inode %llu root %llu",
5234                       ino, root->root_key.objectid);
5235         else
5236                 printf("Set isize in inode %llu root %llu to %llu\n",
5237                        ino, root->root_key.objectid, isize);
5238
5239         btrfs_release_path(path);
5240         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5241         err |= ret;
5242
5243         return err;
5244 }
5245
5246 /*
5247  * Wrapper function for btrfs_add_orphan_item().
5248  *
5249  * Returns 0     on success.
5250  * Returns != 0  on error.
5251  */
5252 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5253                                            struct btrfs_path *path, u64 ino)
5254 {
5255         struct btrfs_trans_handle *trans;
5256         struct btrfs_key research_key;
5257         int ret;
5258         int err = 0;
5259
5260         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5261
5262         trans = btrfs_start_transaction(root, 1);
5263         if (IS_ERR(trans)) {
5264                 ret = PTR_ERR(trans);
5265                 err |= ret;
5266                 goto out;
5267         }
5268
5269         btrfs_release_path(path);
5270         ret = btrfs_add_orphan_item(trans, root, path, ino);
5271         err |= ret;
5272         btrfs_commit_transaction(trans, root);
5273 out:
5274         if (ret)
5275                 error("failed to add inode %llu as orphan item root %llu",
5276                       ino, root->root_key.objectid);
5277         else
5278                 printf("Added inode %llu as orphan item root %llu\n",
5279                        ino, root->root_key.objectid);
5280
5281         btrfs_release_path(path);
5282         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5283         err |= ret;
5284
5285         return err;
5286 }
5287
5288 /*
5289  * Check INODE_ITEM and related ITEMs (the same inode number)
5290  * 1. check link count
5291  * 2. check inode ref/extref
5292  * 3. check dir item/index
5293  *
5294  * @ext_ref:    the EXTENDED_IREF feature
5295  *
5296  * Return 0 if no error occurred.
5297  * Return >0 for error or hit the traversal is done(by error bitmap)
5298  */
5299 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5300                             unsigned int ext_ref)
5301 {
5302         struct extent_buffer *node;
5303         struct btrfs_inode_item *ii;
5304         struct btrfs_key key;
5305         u64 inode_id;
5306         u32 mode;
5307         u64 nlink;
5308         u64 nbytes;
5309         u64 isize;
5310         u64 size = 0;
5311         u64 refs = 0;
5312         u64 extent_end = 0;
5313         u64 extent_size = 0;
5314         unsigned int dir;
5315         unsigned int nodatasum;
5316         int slot;
5317         int ret;
5318         int err = 0;
5319         char namebuf[BTRFS_NAME_LEN] = {0};
5320         u32 name_len = 0;
5321
5322         node = path->nodes[0];
5323         slot = path->slots[0];
5324
5325         btrfs_item_key_to_cpu(node, &key, slot);
5326         inode_id = key.objectid;
5327
5328         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5329                 ret = btrfs_next_item(root, path);
5330                 if (ret > 0)
5331                         err |= LAST_ITEM;
5332                 return err;
5333         }
5334
5335         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5336         isize = btrfs_inode_size(node, ii);
5337         nbytes = btrfs_inode_nbytes(node, ii);
5338         mode = btrfs_inode_mode(node, ii);
5339         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5340         nlink = btrfs_inode_nlink(node, ii);
5341         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5342
5343         while (1) {
5344                 ret = btrfs_next_item(root, path);
5345                 if (ret < 0) {
5346                         /* out will fill 'err' rusing current statistics */
5347                         goto out;
5348                 } else if (ret > 0) {
5349                         err |= LAST_ITEM;
5350                         goto out;
5351                 }
5352
5353                 node = path->nodes[0];
5354                 slot = path->slots[0];
5355                 btrfs_item_key_to_cpu(node, &key, slot);
5356                 if (key.objectid != inode_id)
5357                         goto out;
5358
5359                 switch (key.type) {
5360                 case BTRFS_INODE_REF_KEY:
5361                         ret = check_inode_ref(root, &key, path, namebuf,
5362                                               &name_len, &refs, mode);
5363                         err |= ret;
5364                         break;
5365                 case BTRFS_INODE_EXTREF_KEY:
5366                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5367                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5368                                         root->objectid, key.objectid,
5369                                         key.offset);
5370                         ret = check_inode_extref(root, &key, node, slot, &refs,
5371                                                  mode);
5372                         err |= ret;
5373                         break;
5374                 case BTRFS_DIR_ITEM_KEY:
5375                 case BTRFS_DIR_INDEX_KEY:
5376                         if (!dir) {
5377                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5378                                         root->objectid, inode_id,
5379                                         imode_to_type(mode), key.objectid,
5380                                         key.offset);
5381                         }
5382                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5383                         err |= ret;
5384                         break;
5385                 case BTRFS_EXTENT_DATA_KEY:
5386                         if (dir) {
5387                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5388                                         root->objectid, inode_id, key.objectid,
5389                                         key.offset);
5390                         }
5391                         ret = check_file_extent(root, &key, node, slot,
5392                                                 nodatasum, &extent_size,
5393                                                 &extent_end);
5394                         err |= ret;
5395                         break;
5396                 case BTRFS_XATTR_ITEM_KEY:
5397                         break;
5398                 default:
5399                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5400                               key.objectid, key.type, key.offset);
5401                 }
5402         }
5403
5404 out:
5405         /* verify INODE_ITEM nlink/isize/nbytes */
5406         if (dir) {
5407                 if (nlink != 1) {
5408                         err |= LINK_COUNT_ERROR;
5409                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5410                               root->objectid, inode_id, nlink);
5411                 }
5412
5413                 /*
5414                  * Just a warning, as dir inode nbytes is just an
5415                  * instructive value.
5416                  */
5417                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5418                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5419                                 root->objectid, inode_id,
5420                                 root->fs_info->nodesize);
5421                 }
5422
5423                 if (isize != size) {
5424                         if (repair)
5425                                 ret = repair_dir_isize_lowmem(root, path,
5426                                                               inode_id, size);
5427                         if (!repair || ret) {
5428                                 err |= ISIZE_ERROR;
5429                                 error(
5430                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5431                                       root->objectid, inode_id, isize, size);
5432                         }
5433                 }
5434         } else {
5435                 if (nlink != refs) {
5436                         err |= LINK_COUNT_ERROR;
5437                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5438                               root->objectid, inode_id, nlink, refs);
5439                 } else if (!nlink) {
5440                         if (repair)
5441                                 ret = repair_inode_orphan_item_lowmem(root,
5442                                                               path, inode_id);
5443                         if (!repair || ret) {
5444                                 err |= ORPHAN_ITEM;
5445                                 error("root %llu INODE[%llu] is orphan item",
5446                                       root->objectid, inode_id);
5447                         }
5448                 }
5449
5450                 if (!nbytes && !no_holes && extent_end < isize) {
5451                         err |= NBYTES_ERROR;
5452                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5453                               root->objectid, inode_id, isize);
5454                 }
5455
5456                 if (nbytes != extent_size) {
5457                         if (repair)
5458                                 ret = repair_inode_nbytes_lowmem(root, path,
5459                                                          inode_id, extent_size);
5460                         if (!repair || ret) {
5461                                 err |= NBYTES_ERROR;
5462                                 error(
5463         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5464                                       root->objectid, inode_id, nbytes,
5465                                       extent_size);
5466                         }
5467                 }
5468         }
5469
5470         return err;
5471 }
5472
5473 /*
5474  * check first root dir's inode_item and inode_ref
5475  *
5476  * returns 0 means no error
5477  * returns >0 means error
5478  * returns <0 means fatal error
5479  */
5480 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5481 {
5482         struct btrfs_path path;
5483         struct btrfs_key key;
5484         struct btrfs_inode_item *ii;
5485         u64 index;
5486         u32 mode;
5487         int err = 0;
5488         int ret;
5489
5490         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5491         key.type = BTRFS_INODE_ITEM_KEY;
5492         key.offset = 0;
5493
5494         /* For root being dropped, we don't need to check first inode */
5495         if (btrfs_root_refs(&root->root_item) == 0 &&
5496             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5497             BTRFS_FIRST_FREE_OBJECTID)
5498                 return 0;
5499
5500         btrfs_init_path(&path);
5501         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5502         if (ret < 0)
5503                 goto out;
5504         if (ret > 0) {
5505                 ret = 0;
5506                 err |= INODE_ITEM_MISSING;
5507         } else {
5508                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5509                                     struct btrfs_inode_item);
5510                 mode = btrfs_inode_mode(path.nodes[0], ii);
5511                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5512                         err |= INODE_ITEM_MISMATCH;
5513         }
5514
5515         /* lookup first inode ref */
5516         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5517         key.type = BTRFS_INODE_REF_KEY;
5518         /* special index value */
5519         index = 0;
5520
5521         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5522         if (ret < 0)
5523                 goto out;
5524         err |= ret;
5525
5526 out:
5527         btrfs_release_path(&path);
5528         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5529                 error("root dir INODE_ITEM is %s",
5530                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5531         if (err & INODE_REF_MISSING)
5532                 error("root dir INODE_REF is missing");
5533
5534         return ret < 0 ? ret : err;
5535 }
5536
5537 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5538                                                 u64 parent, u64 root)
5539 {
5540         struct rb_node *node;
5541         struct tree_backref *back = NULL;
5542         struct tree_backref match = {
5543                 .node = {
5544                         .is_data = 0,
5545                 },
5546         };
5547
5548         if (parent) {
5549                 match.parent = parent;
5550                 match.node.full_backref = 1;
5551         } else {
5552                 match.root = root;
5553         }
5554
5555         node = rb_search(&rec->backref_tree, &match.node.node,
5556                          (rb_compare_keys)compare_extent_backref, NULL);
5557         if (node)
5558                 back = to_tree_backref(rb_node_to_extent_backref(node));
5559
5560         return back;
5561 }
5562
5563 static struct data_backref *find_data_backref(struct extent_record *rec,
5564                                                 u64 parent, u64 root,
5565                                                 u64 owner, u64 offset,
5566                                                 int found_ref,
5567                                                 u64 disk_bytenr, u64 bytes)
5568 {
5569         struct rb_node *node;
5570         struct data_backref *back = NULL;
5571         struct data_backref match = {
5572                 .node = {
5573                         .is_data = 1,
5574                 },
5575                 .owner = owner,
5576                 .offset = offset,
5577                 .bytes = bytes,
5578                 .found_ref = found_ref,
5579                 .disk_bytenr = disk_bytenr,
5580         };
5581
5582         if (parent) {
5583                 match.parent = parent;
5584                 match.node.full_backref = 1;
5585         } else {
5586                 match.root = root;
5587         }
5588
5589         node = rb_search(&rec->backref_tree, &match.node.node,
5590                          (rb_compare_keys)compare_extent_backref, NULL);
5591         if (node)
5592                 back = to_data_backref(rb_node_to_extent_backref(node));
5593
5594         return back;
5595 }
5596 /*
5597  * Iterate all item on the tree and call check_inode_item() to check.
5598  *
5599  * @root:       the root of the tree to be checked.
5600  * @ext_ref:    the EXTENDED_IREF feature
5601  *
5602  * Return 0 if no error found.
5603  * Return <0 for error.
5604  */
5605 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5606 {
5607         struct btrfs_path path;
5608         struct node_refs nrefs;
5609         struct btrfs_root_item *root_item = &root->root_item;
5610         int ret;
5611         int level;
5612         int err = 0;
5613
5614         /*
5615          * We need to manually check the first inode item(256)
5616          * As the following traversal function will only start from
5617          * the first inode item in the leaf, if inode item(256) is missing
5618          * we will just skip it forever.
5619          */
5620         ret = check_fs_first_inode(root, ext_ref);
5621         if (ret < 0)
5622                 return ret;
5623         err |= !!ret;
5624
5625         memset(&nrefs, 0, sizeof(nrefs));
5626         level = btrfs_header_level(root->node);
5627         btrfs_init_path(&path);
5628
5629         if (btrfs_root_refs(root_item) > 0 ||
5630             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5631                 path.nodes[level] = root->node;
5632                 path.slots[level] = 0;
5633                 extent_buffer_get(root->node);
5634         } else {
5635                 struct btrfs_key key;
5636
5637                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5638                 level = root_item->drop_level;
5639                 path.lowest_level = level;
5640                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5641                 if (ret < 0)
5642                         goto out;
5643                 ret = 0;
5644         }
5645
5646         while (1) {
5647                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5648                 err |= !!ret;
5649
5650                 /* if ret is negative, walk shall stop */
5651                 if (ret < 0) {
5652                         ret = err;
5653                         break;
5654                 }
5655
5656                 ret = walk_up_tree_v2(root, &path, &level);
5657                 if (ret != 0) {
5658                         /* Normal exit, reset ret to err */
5659                         ret = err;
5660                         break;
5661                 }
5662         }
5663
5664 out:
5665         btrfs_release_path(&path);
5666         return ret;
5667 }
5668
5669 /*
5670  * Find the relative ref for root_ref and root_backref.
5671  *
5672  * @root:       the root of the root tree.
5673  * @ref_key:    the key of the root ref.
5674  *
5675  * Return 0 if no error occurred.
5676  */
5677 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5678                           struct extent_buffer *node, int slot)
5679 {
5680         struct btrfs_path path;
5681         struct btrfs_key key;
5682         struct btrfs_root_ref *ref;
5683         struct btrfs_root_ref *backref;
5684         char ref_name[BTRFS_NAME_LEN] = {0};
5685         char backref_name[BTRFS_NAME_LEN] = {0};
5686         u64 ref_dirid;
5687         u64 ref_seq;
5688         u32 ref_namelen;
5689         u64 backref_dirid;
5690         u64 backref_seq;
5691         u32 backref_namelen;
5692         u32 len;
5693         int ret;
5694         int err = 0;
5695
5696         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5697         ref_dirid = btrfs_root_ref_dirid(node, ref);
5698         ref_seq = btrfs_root_ref_sequence(node, ref);
5699         ref_namelen = btrfs_root_ref_name_len(node, ref);
5700
5701         if (ref_namelen <= BTRFS_NAME_LEN) {
5702                 len = ref_namelen;
5703         } else {
5704                 len = BTRFS_NAME_LEN;
5705                 warning("%s[%llu %llu] ref_name too long",
5706                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5707                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5708                         ref_key->offset);
5709         }
5710         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5711
5712         /* Find relative root_ref */
5713         key.objectid = ref_key->offset;
5714         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5715         key.offset = ref_key->objectid;
5716
5717         btrfs_init_path(&path);
5718         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5719         if (ret) {
5720                 err |= ROOT_REF_MISSING;
5721                 error("%s[%llu %llu] couldn't find relative ref",
5722                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5723                       "ROOT_REF" : "ROOT_BACKREF",
5724                       ref_key->objectid, ref_key->offset);
5725                 goto out;
5726         }
5727
5728         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5729                                  struct btrfs_root_ref);
5730         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5731         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5732         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5733
5734         if (backref_namelen <= BTRFS_NAME_LEN) {
5735                 len = backref_namelen;
5736         } else {
5737                 len = BTRFS_NAME_LEN;
5738                 warning("%s[%llu %llu] ref_name too long",
5739                         key.type == BTRFS_ROOT_REF_KEY ?
5740                         "ROOT_REF" : "ROOT_BACKREF",
5741                         key.objectid, key.offset);
5742         }
5743         read_extent_buffer(path.nodes[0], backref_name,
5744                            (unsigned long)(backref + 1), len);
5745
5746         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5747             ref_namelen != backref_namelen ||
5748             strncmp(ref_name, backref_name, len)) {
5749                 err |= ROOT_REF_MISMATCH;
5750                 error("%s[%llu %llu] mismatch relative ref",
5751                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5752                       "ROOT_REF" : "ROOT_BACKREF",
5753                       ref_key->objectid, ref_key->offset);
5754         }
5755 out:
5756         btrfs_release_path(&path);
5757         return err;
5758 }
5759
5760 /*
5761  * Check all fs/file tree in low_memory mode.
5762  *
5763  * 1. for fs tree root item, call check_fs_root_v2()
5764  * 2. for fs tree root ref/backref, call check_root_ref()
5765  *
5766  * Return 0 if no error occurred.
5767  */
5768 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5769 {
5770         struct btrfs_root *tree_root = fs_info->tree_root;
5771         struct btrfs_root *cur_root = NULL;
5772         struct btrfs_path path;
5773         struct btrfs_key key;
5774         struct extent_buffer *node;
5775         unsigned int ext_ref;
5776         int slot;
5777         int ret;
5778         int err = 0;
5779
5780         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5781
5782         btrfs_init_path(&path);
5783         key.objectid = BTRFS_FS_TREE_OBJECTID;
5784         key.offset = 0;
5785         key.type = BTRFS_ROOT_ITEM_KEY;
5786
5787         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5788         if (ret < 0) {
5789                 err = ret;
5790                 goto out;
5791         } else if (ret > 0) {
5792                 err = -ENOENT;
5793                 goto out;
5794         }
5795
5796         while (1) {
5797                 node = path.nodes[0];
5798                 slot = path.slots[0];
5799                 btrfs_item_key_to_cpu(node, &key, slot);
5800                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5801                         goto out;
5802                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5803                     fs_root_objectid(key.objectid)) {
5804                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5805                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5806                                                                        &key);
5807                         } else {
5808                                 key.offset = (u64)-1;
5809                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5810                         }
5811
5812                         if (IS_ERR(cur_root)) {
5813                                 error("Fail to read fs/subvol tree: %lld",
5814                                       key.objectid);
5815                                 err = -EIO;
5816                                 goto next;
5817                         }
5818
5819                         ret = check_fs_root_v2(cur_root, ext_ref);
5820                         err |= ret;
5821
5822                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5823                                 btrfs_free_fs_root(cur_root);
5824                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5825                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5826                         ret = check_root_ref(tree_root, &key, node, slot);
5827                         err |= ret;
5828                 }
5829 next:
5830                 ret = btrfs_next_item(tree_root, &path);
5831                 if (ret > 0)
5832                         goto out;
5833                 if (ret < 0) {
5834                         err = ret;
5835                         goto out;
5836                 }
5837         }
5838
5839 out:
5840         btrfs_release_path(&path);
5841         return err;
5842 }
5843
5844 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5845                           struct cache_tree *root_cache)
5846 {
5847         int ret;
5848
5849         if (!ctx.progress_enabled)
5850                 fprintf(stderr, "checking fs roots\n");
5851         if (check_mode == CHECK_MODE_LOWMEM)
5852                 ret = check_fs_roots_v2(fs_info);
5853         else
5854                 ret = check_fs_roots(fs_info, root_cache);
5855
5856         return ret;
5857 }
5858
5859 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5860 {
5861         struct extent_backref *back, *tmp;
5862         struct tree_backref *tback;
5863         struct data_backref *dback;
5864         u64 found = 0;
5865         int err = 0;
5866
5867         rbtree_postorder_for_each_entry_safe(back, tmp,
5868                                              &rec->backref_tree, node) {
5869                 if (!back->found_extent_tree) {
5870                         err = 1;
5871                         if (!print_errs)
5872                                 goto out;
5873                         if (back->is_data) {
5874                                 dback = to_data_backref(back);
5875                                 fprintf(stderr, "Data backref %llu %s %llu"
5876                                         " owner %llu offset %llu num_refs %lu"
5877                                         " not found in extent tree\n",
5878                                         (unsigned long long)rec->start,
5879                                         back->full_backref ?
5880                                         "parent" : "root",
5881                                         back->full_backref ?
5882                                         (unsigned long long)dback->parent:
5883                                         (unsigned long long)dback->root,
5884                                         (unsigned long long)dback->owner,
5885                                         (unsigned long long)dback->offset,
5886                                         (unsigned long)dback->num_refs);
5887                         } else {
5888                                 tback = to_tree_backref(back);
5889                                 fprintf(stderr, "Tree backref %llu parent %llu"
5890                                         " root %llu not found in extent tree\n",
5891                                         (unsigned long long)rec->start,
5892                                         (unsigned long long)tback->parent,
5893                                         (unsigned long long)tback->root);
5894                         }
5895                 }
5896                 if (!back->is_data && !back->found_ref) {
5897                         err = 1;
5898                         if (!print_errs)
5899                                 goto out;
5900                         tback = to_tree_backref(back);
5901                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5902                                 (unsigned long long)rec->start,
5903                                 back->full_backref ? "parent" : "root",
5904                                 back->full_backref ?
5905                                 (unsigned long long)tback->parent :
5906                                 (unsigned long long)tback->root, back);
5907                 }
5908                 if (back->is_data) {
5909                         dback = to_data_backref(back);
5910                         if (dback->found_ref != dback->num_refs) {
5911                                 err = 1;
5912                                 if (!print_errs)
5913                                         goto out;
5914                                 fprintf(stderr, "Incorrect local backref count"
5915                                         " on %llu %s %llu owner %llu"
5916                                         " offset %llu found %u wanted %u back %p\n",
5917                                         (unsigned long long)rec->start,
5918                                         back->full_backref ?
5919                                         "parent" : "root",
5920                                         back->full_backref ?
5921                                         (unsigned long long)dback->parent:
5922                                         (unsigned long long)dback->root,
5923                                         (unsigned long long)dback->owner,
5924                                         (unsigned long long)dback->offset,
5925                                         dback->found_ref, dback->num_refs, back);
5926                         }
5927                         if (dback->disk_bytenr != rec->start) {
5928                                 err = 1;
5929                                 if (!print_errs)
5930                                         goto out;
5931                                 fprintf(stderr, "Backref disk bytenr does not"
5932                                         " match extent record, bytenr=%llu, "
5933                                         "ref bytenr=%llu\n",
5934                                         (unsigned long long)rec->start,
5935                                         (unsigned long long)dback->disk_bytenr);
5936                         }
5937
5938                         if (dback->bytes != rec->nr) {
5939                                 err = 1;
5940                                 if (!print_errs)
5941                                         goto out;
5942                                 fprintf(stderr, "Backref bytes do not match "
5943                                         "extent backref, bytenr=%llu, ref "
5944                                         "bytes=%llu, backref bytes=%llu\n",
5945                                         (unsigned long long)rec->start,
5946                                         (unsigned long long)rec->nr,
5947                                         (unsigned long long)dback->bytes);
5948                         }
5949                 }
5950                 if (!back->is_data) {
5951                         found += 1;
5952                 } else {
5953                         dback = to_data_backref(back);
5954                         found += dback->found_ref;
5955                 }
5956         }
5957         if (found != rec->refs) {
5958                 err = 1;
5959                 if (!print_errs)
5960                         goto out;
5961                 fprintf(stderr, "Incorrect global backref count "
5962                         "on %llu found %llu wanted %llu\n",
5963                         (unsigned long long)rec->start,
5964                         (unsigned long long)found,
5965                         (unsigned long long)rec->refs);
5966         }
5967 out:
5968         return err;
5969 }
5970
5971 static void __free_one_backref(struct rb_node *node)
5972 {
5973         struct extent_backref *back = rb_node_to_extent_backref(node);
5974
5975         free(back);
5976 }
5977
5978 static void free_all_extent_backrefs(struct extent_record *rec)
5979 {
5980         rb_free_nodes(&rec->backref_tree, __free_one_backref);
5981 }
5982
5983 static void free_extent_record_cache(struct cache_tree *extent_cache)
5984 {
5985         struct cache_extent *cache;
5986         struct extent_record *rec;
5987
5988         while (1) {
5989                 cache = first_cache_extent(extent_cache);
5990                 if (!cache)
5991                         break;
5992                 rec = container_of(cache, struct extent_record, cache);
5993                 remove_cache_extent(extent_cache, cache);
5994                 free_all_extent_backrefs(rec);
5995                 free(rec);
5996         }
5997 }
5998
5999 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6000                                  struct extent_record *rec)
6001 {
6002         if (rec->content_checked && rec->owner_ref_checked &&
6003             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6004             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6005             !rec->bad_full_backref && !rec->crossing_stripes &&
6006             !rec->wrong_chunk_type) {
6007                 remove_cache_extent(extent_cache, &rec->cache);
6008                 free_all_extent_backrefs(rec);
6009                 list_del_init(&rec->list);
6010                 free(rec);
6011         }
6012         return 0;
6013 }
6014
6015 static int check_owner_ref(struct btrfs_root *root,
6016                             struct extent_record *rec,
6017                             struct extent_buffer *buf)
6018 {
6019         struct extent_backref *node, *tmp;
6020         struct tree_backref *back;
6021         struct btrfs_root *ref_root;
6022         struct btrfs_key key;
6023         struct btrfs_path path;
6024         struct extent_buffer *parent;
6025         int level;
6026         int found = 0;
6027         int ret;
6028
6029         rbtree_postorder_for_each_entry_safe(node, tmp,
6030                                              &rec->backref_tree, node) {
6031                 if (node->is_data)
6032                         continue;
6033                 if (!node->found_ref)
6034                         continue;
6035                 if (node->full_backref)
6036                         continue;
6037                 back = to_tree_backref(node);
6038                 if (btrfs_header_owner(buf) == back->root)
6039                         return 0;
6040         }
6041         BUG_ON(rec->is_root);
6042
6043         /* try to find the block by search corresponding fs tree */
6044         key.objectid = btrfs_header_owner(buf);
6045         key.type = BTRFS_ROOT_ITEM_KEY;
6046         key.offset = (u64)-1;
6047
6048         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6049         if (IS_ERR(ref_root))
6050                 return 1;
6051
6052         level = btrfs_header_level(buf);
6053         if (level == 0)
6054                 btrfs_item_key_to_cpu(buf, &key, 0);
6055         else
6056                 btrfs_node_key_to_cpu(buf, &key, 0);
6057
6058         btrfs_init_path(&path);
6059         path.lowest_level = level + 1;
6060         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6061         if (ret < 0)
6062                 return 0;
6063
6064         parent = path.nodes[level + 1];
6065         if (parent && buf->start == btrfs_node_blockptr(parent,
6066                                                         path.slots[level + 1]))
6067                 found = 1;
6068
6069         btrfs_release_path(&path);
6070         return found ? 0 : 1;
6071 }
6072
6073 static int is_extent_tree_record(struct extent_record *rec)
6074 {
6075         struct extent_backref *node, *tmp;
6076         struct tree_backref *back;
6077         int is_extent = 0;
6078
6079         rbtree_postorder_for_each_entry_safe(node, tmp,
6080                                              &rec->backref_tree, node) {
6081                 if (node->is_data)
6082                         return 0;
6083                 back = to_tree_backref(node);
6084                 if (node->full_backref)
6085                         return 0;
6086                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6087                         is_extent = 1;
6088         }
6089         return is_extent;
6090 }
6091
6092
6093 static int record_bad_block_io(struct btrfs_fs_info *info,
6094                                struct cache_tree *extent_cache,
6095                                u64 start, u64 len)
6096 {
6097         struct extent_record *rec;
6098         struct cache_extent *cache;
6099         struct btrfs_key key;
6100
6101         cache = lookup_cache_extent(extent_cache, start, len);
6102         if (!cache)
6103                 return 0;
6104
6105         rec = container_of(cache, struct extent_record, cache);
6106         if (!is_extent_tree_record(rec))
6107                 return 0;
6108
6109         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6110         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6111 }
6112
6113 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6114                        struct extent_buffer *buf, int slot)
6115 {
6116         if (btrfs_header_level(buf)) {
6117                 struct btrfs_key_ptr ptr1, ptr2;
6118
6119                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6120                                    sizeof(struct btrfs_key_ptr));
6121                 read_extent_buffer(buf, &ptr2,
6122                                    btrfs_node_key_ptr_offset(slot + 1),
6123                                    sizeof(struct btrfs_key_ptr));
6124                 write_extent_buffer(buf, &ptr1,
6125                                     btrfs_node_key_ptr_offset(slot + 1),
6126                                     sizeof(struct btrfs_key_ptr));
6127                 write_extent_buffer(buf, &ptr2,
6128                                     btrfs_node_key_ptr_offset(slot),
6129                                     sizeof(struct btrfs_key_ptr));
6130                 if (slot == 0) {
6131                         struct btrfs_disk_key key;
6132                         btrfs_node_key(buf, &key, 0);
6133                         btrfs_fixup_low_keys(root, path, &key,
6134                                              btrfs_header_level(buf) + 1);
6135                 }
6136         } else {
6137                 struct btrfs_item *item1, *item2;
6138                 struct btrfs_key k1, k2;
6139                 char *item1_data, *item2_data;
6140                 u32 item1_offset, item2_offset, item1_size, item2_size;
6141
6142                 item1 = btrfs_item_nr(slot);
6143                 item2 = btrfs_item_nr(slot + 1);
6144                 btrfs_item_key_to_cpu(buf, &k1, slot);
6145                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6146                 item1_offset = btrfs_item_offset(buf, item1);
6147                 item2_offset = btrfs_item_offset(buf, item2);
6148                 item1_size = btrfs_item_size(buf, item1);
6149                 item2_size = btrfs_item_size(buf, item2);
6150
6151                 item1_data = malloc(item1_size);
6152                 if (!item1_data)
6153                         return -ENOMEM;
6154                 item2_data = malloc(item2_size);
6155                 if (!item2_data) {
6156                         free(item1_data);
6157                         return -ENOMEM;
6158                 }
6159
6160                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6161                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6162
6163                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6164                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6165                 free(item1_data);
6166                 free(item2_data);
6167
6168                 btrfs_set_item_offset(buf, item1, item2_offset);
6169                 btrfs_set_item_offset(buf, item2, item1_offset);
6170                 btrfs_set_item_size(buf, item1, item2_size);
6171                 btrfs_set_item_size(buf, item2, item1_size);
6172
6173                 path->slots[0] = slot;
6174                 btrfs_set_item_key_unsafe(root, path, &k2);
6175                 path->slots[0] = slot + 1;
6176                 btrfs_set_item_key_unsafe(root, path, &k1);
6177         }
6178         return 0;
6179 }
6180
6181 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6182 {
6183         struct extent_buffer *buf;
6184         struct btrfs_key k1, k2;
6185         int i;
6186         int level = path->lowest_level;
6187         int ret = -EIO;
6188
6189         buf = path->nodes[level];
6190         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6191                 if (level) {
6192                         btrfs_node_key_to_cpu(buf, &k1, i);
6193                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6194                 } else {
6195                         btrfs_item_key_to_cpu(buf, &k1, i);
6196                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6197                 }
6198                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6199                         continue;
6200                 ret = swap_values(root, path, buf, i);
6201                 if (ret)
6202                         break;
6203                 btrfs_mark_buffer_dirty(buf);
6204                 i = 0;
6205         }
6206         return ret;
6207 }
6208
6209 static int delete_bogus_item(struct btrfs_root *root,
6210                              struct btrfs_path *path,
6211                              struct extent_buffer *buf, int slot)
6212 {
6213         struct btrfs_key key;
6214         int nritems = btrfs_header_nritems(buf);
6215
6216         btrfs_item_key_to_cpu(buf, &key, slot);
6217
6218         /* These are all the keys we can deal with missing. */
6219         if (key.type != BTRFS_DIR_INDEX_KEY &&
6220             key.type != BTRFS_EXTENT_ITEM_KEY &&
6221             key.type != BTRFS_METADATA_ITEM_KEY &&
6222             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6223             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6224                 return -1;
6225
6226         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6227                (unsigned long long)key.objectid, key.type,
6228                (unsigned long long)key.offset, slot, buf->start);
6229         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6230                               btrfs_item_nr_offset(slot + 1),
6231                               sizeof(struct btrfs_item) *
6232                               (nritems - slot - 1));
6233         btrfs_set_header_nritems(buf, nritems - 1);
6234         if (slot == 0) {
6235                 struct btrfs_disk_key disk_key;
6236
6237                 btrfs_item_key(buf, &disk_key, 0);
6238                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6239         }
6240         btrfs_mark_buffer_dirty(buf);
6241         return 0;
6242 }
6243
6244 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6245 {
6246         struct extent_buffer *buf;
6247         int i;
6248         int ret = 0;
6249
6250         /* We should only get this for leaves */
6251         BUG_ON(path->lowest_level);
6252         buf = path->nodes[0];
6253 again:
6254         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6255                 unsigned int shift = 0, offset;
6256
6257                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6258                     BTRFS_LEAF_DATA_SIZE(root)) {
6259                         if (btrfs_item_end_nr(buf, i) >
6260                             BTRFS_LEAF_DATA_SIZE(root)) {
6261                                 ret = delete_bogus_item(root, path, buf, i);
6262                                 if (!ret)
6263                                         goto again;
6264                                 fprintf(stderr, "item is off the end of the "
6265                                         "leaf, can't fix\n");
6266                                 ret = -EIO;
6267                                 break;
6268                         }
6269                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6270                                 btrfs_item_end_nr(buf, i);
6271                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6272                            btrfs_item_offset_nr(buf, i - 1)) {
6273                         if (btrfs_item_end_nr(buf, i) >
6274                             btrfs_item_offset_nr(buf, i - 1)) {
6275                                 ret = delete_bogus_item(root, path, buf, i);
6276                                 if (!ret)
6277                                         goto again;
6278                                 fprintf(stderr, "items overlap, can't fix\n");
6279                                 ret = -EIO;
6280                                 break;
6281                         }
6282                         shift = btrfs_item_offset_nr(buf, i - 1) -
6283                                 btrfs_item_end_nr(buf, i);
6284                 }
6285                 if (!shift)
6286                         continue;
6287
6288                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6289                        i, shift, (unsigned long long)buf->start);
6290                 offset = btrfs_item_offset_nr(buf, i);
6291                 memmove_extent_buffer(buf,
6292                                       btrfs_leaf_data(buf) + offset + shift,
6293                                       btrfs_leaf_data(buf) + offset,
6294                                       btrfs_item_size_nr(buf, i));
6295                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6296                                       offset + shift);
6297                 btrfs_mark_buffer_dirty(buf);
6298         }
6299
6300         /*
6301          * We may have moved things, in which case we want to exit so we don't
6302          * write those changes out.  Once we have proper abort functionality in
6303          * progs this can be changed to something nicer.
6304          */
6305         BUG_ON(ret);
6306         return ret;
6307 }
6308
6309 /*
6310  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6311  * then just return -EIO.
6312  */
6313 static int try_to_fix_bad_block(struct btrfs_root *root,
6314                                 struct extent_buffer *buf,
6315                                 enum btrfs_tree_block_status status)
6316 {
6317         struct btrfs_trans_handle *trans;
6318         struct ulist *roots;
6319         struct ulist_node *node;
6320         struct btrfs_root *search_root;
6321         struct btrfs_path path;
6322         struct ulist_iterator iter;
6323         struct btrfs_key root_key, key;
6324         int ret;
6325
6326         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6327             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6328                 return -EIO;
6329
6330         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6331         if (ret)
6332                 return -EIO;
6333
6334         btrfs_init_path(&path);
6335         ULIST_ITER_INIT(&iter);
6336         while ((node = ulist_next(roots, &iter))) {
6337                 root_key.objectid = node->val;
6338                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6339                 root_key.offset = (u64)-1;
6340
6341                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6342                 if (IS_ERR(root)) {
6343                         ret = -EIO;
6344                         break;
6345                 }
6346
6347
6348                 trans = btrfs_start_transaction(search_root, 0);
6349                 if (IS_ERR(trans)) {
6350                         ret = PTR_ERR(trans);
6351                         break;
6352                 }
6353
6354                 path.lowest_level = btrfs_header_level(buf);
6355                 path.skip_check_block = 1;
6356                 if (path.lowest_level)
6357                         btrfs_node_key_to_cpu(buf, &key, 0);
6358                 else
6359                         btrfs_item_key_to_cpu(buf, &key, 0);
6360                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6361                 if (ret) {
6362                         ret = -EIO;
6363                         btrfs_commit_transaction(trans, search_root);
6364                         break;
6365                 }
6366                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6367                         ret = fix_key_order(search_root, &path);
6368                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6369                         ret = fix_item_offset(search_root, &path);
6370                 if (ret) {
6371                         btrfs_commit_transaction(trans, search_root);
6372                         break;
6373                 }
6374                 btrfs_release_path(&path);
6375                 btrfs_commit_transaction(trans, search_root);
6376         }
6377         ulist_free(roots);
6378         btrfs_release_path(&path);
6379         return ret;
6380 }
6381
6382 static int check_block(struct btrfs_root *root,
6383                        struct cache_tree *extent_cache,
6384                        struct extent_buffer *buf, u64 flags)
6385 {
6386         struct extent_record *rec;
6387         struct cache_extent *cache;
6388         struct btrfs_key key;
6389         enum btrfs_tree_block_status status;
6390         int ret = 0;
6391         int level;
6392
6393         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6394         if (!cache)
6395                 return 1;
6396         rec = container_of(cache, struct extent_record, cache);
6397         rec->generation = btrfs_header_generation(buf);
6398
6399         level = btrfs_header_level(buf);
6400         if (btrfs_header_nritems(buf) > 0) {
6401
6402                 if (level == 0)
6403                         btrfs_item_key_to_cpu(buf, &key, 0);
6404                 else
6405                         btrfs_node_key_to_cpu(buf, &key, 0);
6406
6407                 rec->info_objectid = key.objectid;
6408         }
6409         rec->info_level = level;
6410
6411         if (btrfs_is_leaf(buf))
6412                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6413         else
6414                 status = btrfs_check_node(root, &rec->parent_key, buf);
6415
6416         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6417                 if (repair)
6418                         status = try_to_fix_bad_block(root, buf, status);
6419                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6420                         ret = -EIO;
6421                         fprintf(stderr, "bad block %llu\n",
6422                                 (unsigned long long)buf->start);
6423                 } else {
6424                         /*
6425                          * Signal to callers we need to start the scan over
6426                          * again since we'll have cowed blocks.
6427                          */
6428                         ret = -EAGAIN;
6429                 }
6430         } else {
6431                 rec->content_checked = 1;
6432                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6433                         rec->owner_ref_checked = 1;
6434                 else {
6435                         ret = check_owner_ref(root, rec, buf);
6436                         if (!ret)
6437                                 rec->owner_ref_checked = 1;
6438                 }
6439         }
6440         if (!ret)
6441                 maybe_free_extent_rec(extent_cache, rec);
6442         return ret;
6443 }
6444
6445 #if 0
6446 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6447                                                 u64 parent, u64 root)
6448 {
6449         struct list_head *cur = rec->backrefs.next;
6450         struct extent_backref *node;
6451         struct tree_backref *back;
6452
6453         while(cur != &rec->backrefs) {
6454                 node = to_extent_backref(cur);
6455                 cur = cur->next;
6456                 if (node->is_data)
6457                         continue;
6458                 back = to_tree_backref(node);
6459                 if (parent > 0) {
6460                         if (!node->full_backref)
6461                                 continue;
6462                         if (parent == back->parent)
6463                                 return back;
6464                 } else {
6465                         if (node->full_backref)
6466                                 continue;
6467                         if (back->root == root)
6468                                 return back;
6469                 }
6470         }
6471         return NULL;
6472 }
6473 #endif
6474
6475 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6476                                                 u64 parent, u64 root)
6477 {
6478         struct tree_backref *ref = malloc(sizeof(*ref));
6479
6480         if (!ref)
6481                 return NULL;
6482         memset(&ref->node, 0, sizeof(ref->node));
6483         if (parent > 0) {
6484                 ref->parent = parent;
6485                 ref->node.full_backref = 1;
6486         } else {
6487                 ref->root = root;
6488                 ref->node.full_backref = 0;
6489         }
6490
6491         return ref;
6492 }
6493
6494 #if 0
6495 static struct data_backref *find_data_backref(struct extent_record *rec,
6496                                                 u64 parent, u64 root,
6497                                                 u64 owner, u64 offset,
6498                                                 int found_ref,
6499                                                 u64 disk_bytenr, u64 bytes)
6500 {
6501         struct list_head *cur = rec->backrefs.next;
6502         struct extent_backref *node;
6503         struct data_backref *back;
6504
6505         while(cur != &rec->backrefs) {
6506                 node = to_extent_backref(cur);
6507                 cur = cur->next;
6508                 if (!node->is_data)
6509                         continue;
6510                 back = to_data_backref(node);
6511                 if (parent > 0) {
6512                         if (!node->full_backref)
6513                                 continue;
6514                         if (parent == back->parent)
6515                                 return back;
6516                 } else {
6517                         if (node->full_backref)
6518                                 continue;
6519                         if (back->root == root && back->owner == owner &&
6520                             back->offset == offset) {
6521                                 if (found_ref && node->found_ref &&
6522                                     (back->bytes != bytes ||
6523                                     back->disk_bytenr != disk_bytenr))
6524                                         continue;
6525                                 return back;
6526                         }
6527                 }
6528         }
6529         return NULL;
6530 }
6531 #endif
6532
6533 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6534                                                 u64 parent, u64 root,
6535                                                 u64 owner, u64 offset,
6536                                                 u64 max_size)
6537 {
6538         struct data_backref *ref = malloc(sizeof(*ref));
6539
6540         if (!ref)
6541                 return NULL;
6542         memset(&ref->node, 0, sizeof(ref->node));
6543         ref->node.is_data = 1;
6544
6545         if (parent > 0) {
6546                 ref->parent = parent;
6547                 ref->owner = 0;
6548                 ref->offset = 0;
6549                 ref->node.full_backref = 1;
6550         } else {
6551                 ref->root = root;
6552                 ref->owner = owner;
6553                 ref->offset = offset;
6554                 ref->node.full_backref = 0;
6555         }
6556         ref->bytes = max_size;
6557         ref->found_ref = 0;
6558         ref->num_refs = 0;
6559         if (max_size > rec->max_size)
6560                 rec->max_size = max_size;
6561         return ref;
6562 }
6563
6564 /* Check if the type of extent matches with its chunk */
6565 static void check_extent_type(struct extent_record *rec)
6566 {
6567         struct btrfs_block_group_cache *bg_cache;
6568
6569         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6570         if (!bg_cache)
6571                 return;
6572
6573         /* data extent, check chunk directly*/
6574         if (!rec->metadata) {
6575                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6576                         rec->wrong_chunk_type = 1;
6577                 return;
6578         }
6579
6580         /* metadata extent, check the obvious case first */
6581         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6582                                  BTRFS_BLOCK_GROUP_METADATA))) {
6583                 rec->wrong_chunk_type = 1;
6584                 return;
6585         }
6586
6587         /*
6588          * Check SYSTEM extent, as it's also marked as metadata, we can only
6589          * make sure it's a SYSTEM extent by its backref
6590          */
6591         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6592                 struct extent_backref *node;
6593                 struct tree_backref *tback;
6594                 u64 bg_type;
6595
6596                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6597                 if (node->is_data) {
6598                         /* tree block shouldn't have data backref */
6599                         rec->wrong_chunk_type = 1;
6600                         return;
6601                 }
6602                 tback = container_of(node, struct tree_backref, node);
6603
6604                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6605                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6606                 else
6607                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6608                 if (!(bg_cache->flags & bg_type))
6609                         rec->wrong_chunk_type = 1;
6610         }
6611 }
6612
6613 /*
6614  * Allocate a new extent record, fill default values from @tmpl and insert int
6615  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6616  * the cache, otherwise it fails.
6617  */
6618 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6619                 struct extent_record *tmpl)
6620 {
6621         struct extent_record *rec;
6622         int ret = 0;
6623
6624         BUG_ON(tmpl->max_size == 0);
6625         rec = malloc(sizeof(*rec));
6626         if (!rec)
6627                 return -ENOMEM;
6628         rec->start = tmpl->start;
6629         rec->max_size = tmpl->max_size;
6630         rec->nr = max(tmpl->nr, tmpl->max_size);
6631         rec->found_rec = tmpl->found_rec;
6632         rec->content_checked = tmpl->content_checked;
6633         rec->owner_ref_checked = tmpl->owner_ref_checked;
6634         rec->num_duplicates = 0;
6635         rec->metadata = tmpl->metadata;
6636         rec->flag_block_full_backref = FLAG_UNSET;
6637         rec->bad_full_backref = 0;
6638         rec->crossing_stripes = 0;
6639         rec->wrong_chunk_type = 0;
6640         rec->is_root = tmpl->is_root;
6641         rec->refs = tmpl->refs;
6642         rec->extent_item_refs = tmpl->extent_item_refs;
6643         rec->parent_generation = tmpl->parent_generation;
6644         INIT_LIST_HEAD(&rec->backrefs);
6645         INIT_LIST_HEAD(&rec->dups);
6646         INIT_LIST_HEAD(&rec->list);
6647         rec->backref_tree = RB_ROOT;
6648         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6649         rec->cache.start = tmpl->start;
6650         rec->cache.size = tmpl->nr;
6651         ret = insert_cache_extent(extent_cache, &rec->cache);
6652         if (ret) {
6653                 free(rec);
6654                 return ret;
6655         }
6656         bytes_used += rec->nr;
6657
6658         if (tmpl->metadata)
6659                 rec->crossing_stripes = check_crossing_stripes(global_info,
6660                                 rec->start, global_info->nodesize);
6661         check_extent_type(rec);
6662         return ret;
6663 }
6664
6665 /*
6666  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6667  * some are hints:
6668  * - refs              - if found, increase refs
6669  * - is_root           - if found, set
6670  * - content_checked   - if found, set
6671  * - owner_ref_checked - if found, set
6672  *
6673  * If not found, create a new one, initialize and insert.
6674  */
6675 static int add_extent_rec(struct cache_tree *extent_cache,
6676                 struct extent_record *tmpl)
6677 {
6678         struct extent_record *rec;
6679         struct cache_extent *cache;
6680         int ret = 0;
6681         int dup = 0;
6682
6683         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6684         if (cache) {
6685                 rec = container_of(cache, struct extent_record, cache);
6686                 if (tmpl->refs)
6687                         rec->refs++;
6688                 if (rec->nr == 1)
6689                         rec->nr = max(tmpl->nr, tmpl->max_size);
6690
6691                 /*
6692                  * We need to make sure to reset nr to whatever the extent
6693                  * record says was the real size, this way we can compare it to
6694                  * the backrefs.
6695                  */
6696                 if (tmpl->found_rec) {
6697                         if (tmpl->start != rec->start || rec->found_rec) {
6698                                 struct extent_record *tmp;
6699
6700                                 dup = 1;
6701                                 if (list_empty(&rec->list))
6702                                         list_add_tail(&rec->list,
6703                                                       &duplicate_extents);
6704
6705                                 /*
6706                                  * We have to do this song and dance in case we
6707                                  * find an extent record that falls inside of
6708                                  * our current extent record but does not have
6709                                  * the same objectid.
6710                                  */
6711                                 tmp = malloc(sizeof(*tmp));
6712                                 if (!tmp)
6713                                         return -ENOMEM;
6714                                 tmp->start = tmpl->start;
6715                                 tmp->max_size = tmpl->max_size;
6716                                 tmp->nr = tmpl->nr;
6717                                 tmp->found_rec = 1;
6718                                 tmp->metadata = tmpl->metadata;
6719                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6720                                 INIT_LIST_HEAD(&tmp->list);
6721                                 list_add_tail(&tmp->list, &rec->dups);
6722                                 rec->num_duplicates++;
6723                         } else {
6724                                 rec->nr = tmpl->nr;
6725                                 rec->found_rec = 1;
6726                         }
6727                 }
6728
6729                 if (tmpl->extent_item_refs && !dup) {
6730                         if (rec->extent_item_refs) {
6731                                 fprintf(stderr, "block %llu rec "
6732                                         "extent_item_refs %llu, passed %llu\n",
6733                                         (unsigned long long)tmpl->start,
6734                                         (unsigned long long)
6735                                                         rec->extent_item_refs,
6736                                         (unsigned long long)tmpl->extent_item_refs);
6737                         }
6738                         rec->extent_item_refs = tmpl->extent_item_refs;
6739                 }
6740                 if (tmpl->is_root)
6741                         rec->is_root = 1;
6742                 if (tmpl->content_checked)
6743                         rec->content_checked = 1;
6744                 if (tmpl->owner_ref_checked)
6745                         rec->owner_ref_checked = 1;
6746                 memcpy(&rec->parent_key, &tmpl->parent_key,
6747                                 sizeof(tmpl->parent_key));
6748                 if (tmpl->parent_generation)
6749                         rec->parent_generation = tmpl->parent_generation;
6750                 if (rec->max_size < tmpl->max_size)
6751                         rec->max_size = tmpl->max_size;
6752
6753                 /*
6754                  * A metadata extent can't cross stripe_len boundary, otherwise
6755                  * kernel scrub won't be able to handle it.
6756                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6757                  * it.
6758                  */
6759                 if (tmpl->metadata)
6760                         rec->crossing_stripes = check_crossing_stripes(
6761                                         global_info, rec->start,
6762                                         global_info->nodesize);
6763                 check_extent_type(rec);
6764                 maybe_free_extent_rec(extent_cache, rec);
6765                 return ret;
6766         }
6767
6768         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6769
6770         return ret;
6771 }
6772
6773 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6774                             u64 parent, u64 root, int found_ref)
6775 {
6776         struct extent_record *rec;
6777         struct tree_backref *back;
6778         struct cache_extent *cache;
6779         int ret;
6780         bool insert = false;
6781
6782         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6783         if (!cache) {
6784                 struct extent_record tmpl;
6785
6786                 memset(&tmpl, 0, sizeof(tmpl));
6787                 tmpl.start = bytenr;
6788                 tmpl.nr = 1;
6789                 tmpl.metadata = 1;
6790                 tmpl.max_size = 1;
6791
6792                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6793                 if (ret)
6794                         return ret;
6795
6796                 /* really a bug in cache_extent implement now */
6797                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6798                 if (!cache)
6799                         return -ENOENT;
6800         }
6801
6802         rec = container_of(cache, struct extent_record, cache);
6803         if (rec->start != bytenr) {
6804                 /*
6805                  * Several cause, from unaligned bytenr to over lapping extents
6806                  */
6807                 return -EEXIST;
6808         }
6809
6810         back = find_tree_backref(rec, parent, root);
6811         if (!back) {
6812                 back = alloc_tree_backref(rec, parent, root);
6813                 if (!back)
6814                         return -ENOMEM;
6815                 insert = true;
6816         }
6817
6818         if (found_ref) {
6819                 if (back->node.found_ref) {
6820                         fprintf(stderr, "Extent back ref already exists "
6821                                 "for %llu parent %llu root %llu \n",
6822                                 (unsigned long long)bytenr,
6823                                 (unsigned long long)parent,
6824                                 (unsigned long long)root);
6825                 }
6826                 back->node.found_ref = 1;
6827         } else {
6828                 if (back->node.found_extent_tree) {
6829                         fprintf(stderr, "Extent back ref already exists "
6830                                 "for %llu parent %llu root %llu \n",
6831                                 (unsigned long long)bytenr,
6832                                 (unsigned long long)parent,
6833                                 (unsigned long long)root);
6834                 }
6835                 back->node.found_extent_tree = 1;
6836         }
6837         if (insert)
6838                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6839                         compare_extent_backref));
6840         check_extent_type(rec);
6841         maybe_free_extent_rec(extent_cache, rec);
6842         return 0;
6843 }
6844
6845 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6846                             u64 parent, u64 root, u64 owner, u64 offset,
6847                             u32 num_refs, int found_ref, u64 max_size)
6848 {
6849         struct extent_record *rec;
6850         struct data_backref *back;
6851         struct cache_extent *cache;
6852         int ret;
6853         bool insert = false;
6854
6855         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6856         if (!cache) {
6857                 struct extent_record tmpl;
6858
6859                 memset(&tmpl, 0, sizeof(tmpl));
6860                 tmpl.start = bytenr;
6861                 tmpl.nr = 1;
6862                 tmpl.max_size = max_size;
6863
6864                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6865                 if (ret)
6866                         return ret;
6867
6868                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6869                 if (!cache)
6870                         abort();
6871         }
6872
6873         rec = container_of(cache, struct extent_record, cache);
6874         if (rec->max_size < max_size)
6875                 rec->max_size = max_size;
6876
6877         /*
6878          * If found_ref is set then max_size is the real size and must match the
6879          * existing refs.  So if we have already found a ref then we need to
6880          * make sure that this ref matches the existing one, otherwise we need
6881          * to add a new backref so we can notice that the backrefs don't match
6882          * and we need to figure out who is telling the truth.  This is to
6883          * account for that awful fsync bug I introduced where we'd end up with
6884          * a btrfs_file_extent_item that would have its length include multiple
6885          * prealloc extents or point inside of a prealloc extent.
6886          */
6887         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6888                                  bytenr, max_size);
6889         if (!back) {
6890                 back = alloc_data_backref(rec, parent, root, owner, offset,
6891                                           max_size);
6892                 BUG_ON(!back);
6893                 insert = true;
6894         }
6895
6896         if (found_ref) {
6897                 BUG_ON(num_refs != 1);
6898                 if (back->node.found_ref)
6899                         BUG_ON(back->bytes != max_size);
6900                 back->node.found_ref = 1;
6901                 back->found_ref += 1;
6902                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6903                         back->bytes = max_size;
6904                         back->disk_bytenr = bytenr;
6905
6906                         /* Need to reinsert if not already in the tree */
6907                         if (!insert) {
6908                                 rb_erase(&back->node.node, &rec->backref_tree);
6909                                 insert = true;
6910                         }
6911                 }
6912                 rec->refs += 1;
6913                 rec->content_checked = 1;
6914                 rec->owner_ref_checked = 1;
6915         } else {
6916                 if (back->node.found_extent_tree) {
6917                         fprintf(stderr, "Extent back ref already exists "
6918                                 "for %llu parent %llu root %llu "
6919                                 "owner %llu offset %llu num_refs %lu\n",
6920                                 (unsigned long long)bytenr,
6921                                 (unsigned long long)parent,
6922                                 (unsigned long long)root,
6923                                 (unsigned long long)owner,
6924                                 (unsigned long long)offset,
6925                                 (unsigned long)num_refs);
6926                 }
6927                 back->num_refs = num_refs;
6928                 back->node.found_extent_tree = 1;
6929         }
6930         if (insert)
6931                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6932                         compare_extent_backref));
6933
6934         maybe_free_extent_rec(extent_cache, rec);
6935         return 0;
6936 }
6937
6938 static int add_pending(struct cache_tree *pending,
6939                        struct cache_tree *seen, u64 bytenr, u32 size)
6940 {
6941         int ret;
6942         ret = add_cache_extent(seen, bytenr, size);
6943         if (ret)
6944                 return ret;
6945         add_cache_extent(pending, bytenr, size);
6946         return 0;
6947 }
6948
6949 static int pick_next_pending(struct cache_tree *pending,
6950                         struct cache_tree *reada,
6951                         struct cache_tree *nodes,
6952                         u64 last, struct block_info *bits, int bits_nr,
6953                         int *reada_bits)
6954 {
6955         unsigned long node_start = last;
6956         struct cache_extent *cache;
6957         int ret;
6958
6959         cache = search_cache_extent(reada, 0);
6960         if (cache) {
6961                 bits[0].start = cache->start;
6962                 bits[0].size = cache->size;
6963                 *reada_bits = 1;
6964                 return 1;
6965         }
6966         *reada_bits = 0;
6967         if (node_start > 32768)
6968                 node_start -= 32768;
6969
6970         cache = search_cache_extent(nodes, node_start);
6971         if (!cache)
6972                 cache = search_cache_extent(nodes, 0);
6973
6974         if (!cache) {
6975                  cache = search_cache_extent(pending, 0);
6976                  if (!cache)
6977                          return 0;
6978                  ret = 0;
6979                  do {
6980                          bits[ret].start = cache->start;
6981                          bits[ret].size = cache->size;
6982                          cache = next_cache_extent(cache);
6983                          ret++;
6984                  } while (cache && ret < bits_nr);
6985                  return ret;
6986         }
6987
6988         ret = 0;
6989         do {
6990                 bits[ret].start = cache->start;
6991                 bits[ret].size = cache->size;
6992                 cache = next_cache_extent(cache);
6993                 ret++;
6994         } while (cache && ret < bits_nr);
6995
6996         if (bits_nr - ret > 8) {
6997                 u64 lookup = bits[0].start + bits[0].size;
6998                 struct cache_extent *next;
6999                 next = search_cache_extent(pending, lookup);
7000                 while(next) {
7001                         if (next->start - lookup > 32768)
7002                                 break;
7003                         bits[ret].start = next->start;
7004                         bits[ret].size = next->size;
7005                         lookup = next->start + next->size;
7006                         ret++;
7007                         if (ret == bits_nr)
7008                                 break;
7009                         next = next_cache_extent(next);
7010                         if (!next)
7011                                 break;
7012                 }
7013         }
7014         return ret;
7015 }
7016
7017 static void free_chunk_record(struct cache_extent *cache)
7018 {
7019         struct chunk_record *rec;
7020
7021         rec = container_of(cache, struct chunk_record, cache);
7022         list_del_init(&rec->list);
7023         list_del_init(&rec->dextents);
7024         free(rec);
7025 }
7026
7027 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7028 {
7029         cache_tree_free_extents(chunk_cache, free_chunk_record);
7030 }
7031
7032 static void free_device_record(struct rb_node *node)
7033 {
7034         struct device_record *rec;
7035
7036         rec = container_of(node, struct device_record, node);
7037         free(rec);
7038 }
7039
7040 FREE_RB_BASED_TREE(device_cache, free_device_record);
7041
7042 int insert_block_group_record(struct block_group_tree *tree,
7043                               struct block_group_record *bg_rec)
7044 {
7045         int ret;
7046
7047         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7048         if (ret)
7049                 return ret;
7050
7051         list_add_tail(&bg_rec->list, &tree->block_groups);
7052         return 0;
7053 }
7054
7055 static void free_block_group_record(struct cache_extent *cache)
7056 {
7057         struct block_group_record *rec;
7058
7059         rec = container_of(cache, struct block_group_record, cache);
7060         list_del_init(&rec->list);
7061         free(rec);
7062 }
7063
7064 void free_block_group_tree(struct block_group_tree *tree)
7065 {
7066         cache_tree_free_extents(&tree->tree, free_block_group_record);
7067 }
7068
7069 int insert_device_extent_record(struct device_extent_tree *tree,
7070                                 struct device_extent_record *de_rec)
7071 {
7072         int ret;
7073
7074         /*
7075          * Device extent is a bit different from the other extents, because
7076          * the extents which belong to the different devices may have the
7077          * same start and size, so we need use the special extent cache
7078          * search/insert functions.
7079          */
7080         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7081         if (ret)
7082                 return ret;
7083
7084         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7085         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7086         return 0;
7087 }
7088
7089 static void free_device_extent_record(struct cache_extent *cache)
7090 {
7091         struct device_extent_record *rec;
7092
7093         rec = container_of(cache, struct device_extent_record, cache);
7094         if (!list_empty(&rec->chunk_list))
7095                 list_del_init(&rec->chunk_list);
7096         if (!list_empty(&rec->device_list))
7097                 list_del_init(&rec->device_list);
7098         free(rec);
7099 }
7100
7101 void free_device_extent_tree(struct device_extent_tree *tree)
7102 {
7103         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7104 }
7105
7106 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7107 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7108                                  struct extent_buffer *leaf, int slot)
7109 {
7110         struct btrfs_extent_ref_v0 *ref0;
7111         struct btrfs_key key;
7112         int ret;
7113
7114         btrfs_item_key_to_cpu(leaf, &key, slot);
7115         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7116         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7117                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7118                                 0, 0);
7119         } else {
7120                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7121                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7122         }
7123         return ret;
7124 }
7125 #endif
7126
7127 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7128                                             struct btrfs_key *key,
7129                                             int slot)
7130 {
7131         struct btrfs_chunk *ptr;
7132         struct chunk_record *rec;
7133         int num_stripes, i;
7134
7135         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7136         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7137
7138         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7139         if (!rec) {
7140                 fprintf(stderr, "memory allocation failed\n");
7141                 exit(-1);
7142         }
7143
7144         INIT_LIST_HEAD(&rec->list);
7145         INIT_LIST_HEAD(&rec->dextents);
7146         rec->bg_rec = NULL;
7147
7148         rec->cache.start = key->offset;
7149         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7150
7151         rec->generation = btrfs_header_generation(leaf);
7152
7153         rec->objectid = key->objectid;
7154         rec->type = key->type;
7155         rec->offset = key->offset;
7156
7157         rec->length = rec->cache.size;
7158         rec->owner = btrfs_chunk_owner(leaf, ptr);
7159         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7160         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7161         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7162         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7163         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7164         rec->num_stripes = num_stripes;
7165         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7166
7167         for (i = 0; i < rec->num_stripes; ++i) {
7168                 rec->stripes[i].devid =
7169                         btrfs_stripe_devid_nr(leaf, ptr, i);
7170                 rec->stripes[i].offset =
7171                         btrfs_stripe_offset_nr(leaf, ptr, i);
7172                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7173                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7174                                 BTRFS_UUID_SIZE);
7175         }
7176
7177         return rec;
7178 }
7179
7180 static int process_chunk_item(struct cache_tree *chunk_cache,
7181                               struct btrfs_key *key, struct extent_buffer *eb,
7182                               int slot)
7183 {
7184         struct chunk_record *rec;
7185         struct btrfs_chunk *chunk;
7186         int ret = 0;
7187
7188         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7189         /*
7190          * Do extra check for this chunk item,
7191          *
7192          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7193          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7194          * and owner<->key_type check.
7195          */
7196         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7197                                       key->offset);
7198         if (ret < 0) {
7199                 error("chunk(%llu, %llu) is not valid, ignore it",
7200                       key->offset, btrfs_chunk_length(eb, chunk));
7201                 return 0;
7202         }
7203         rec = btrfs_new_chunk_record(eb, key, slot);
7204         ret = insert_cache_extent(chunk_cache, &rec->cache);
7205         if (ret) {
7206                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7207                         rec->offset, rec->length);
7208                 free(rec);
7209         }
7210
7211         return ret;
7212 }
7213
7214 static int process_device_item(struct rb_root *dev_cache,
7215                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7216 {
7217         struct btrfs_dev_item *ptr;
7218         struct device_record *rec;
7219         int ret = 0;
7220
7221         ptr = btrfs_item_ptr(eb,
7222                 slot, struct btrfs_dev_item);
7223
7224         rec = malloc(sizeof(*rec));
7225         if (!rec) {
7226                 fprintf(stderr, "memory allocation failed\n");
7227                 return -ENOMEM;
7228         }
7229
7230         rec->devid = key->offset;
7231         rec->generation = btrfs_header_generation(eb);
7232
7233         rec->objectid = key->objectid;
7234         rec->type = key->type;
7235         rec->offset = key->offset;
7236
7237         rec->devid = btrfs_device_id(eb, ptr);
7238         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7239         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7240
7241         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7242         if (ret) {
7243                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7244                 free(rec);
7245         }
7246
7247         return ret;
7248 }
7249
7250 struct block_group_record *
7251 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7252                              int slot)
7253 {
7254         struct btrfs_block_group_item *ptr;
7255         struct block_group_record *rec;
7256
7257         rec = calloc(1, sizeof(*rec));
7258         if (!rec) {
7259                 fprintf(stderr, "memory allocation failed\n");
7260                 exit(-1);
7261         }
7262
7263         rec->cache.start = key->objectid;
7264         rec->cache.size = key->offset;
7265
7266         rec->generation = btrfs_header_generation(leaf);
7267
7268         rec->objectid = key->objectid;
7269         rec->type = key->type;
7270         rec->offset = key->offset;
7271
7272         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7273         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7274
7275         INIT_LIST_HEAD(&rec->list);
7276
7277         return rec;
7278 }
7279
7280 static int process_block_group_item(struct block_group_tree *block_group_cache,
7281                                     struct btrfs_key *key,
7282                                     struct extent_buffer *eb, int slot)
7283 {
7284         struct block_group_record *rec;
7285         int ret = 0;
7286
7287         rec = btrfs_new_block_group_record(eb, key, slot);
7288         ret = insert_block_group_record(block_group_cache, rec);
7289         if (ret) {
7290                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7291                         rec->objectid, rec->offset);
7292                 free(rec);
7293         }
7294
7295         return ret;
7296 }
7297
7298 struct device_extent_record *
7299 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7300                                struct btrfs_key *key, int slot)
7301 {
7302         struct device_extent_record *rec;
7303         struct btrfs_dev_extent *ptr;
7304
7305         rec = calloc(1, sizeof(*rec));
7306         if (!rec) {
7307                 fprintf(stderr, "memory allocation failed\n");
7308                 exit(-1);
7309         }
7310
7311         rec->cache.objectid = key->objectid;
7312         rec->cache.start = key->offset;
7313
7314         rec->generation = btrfs_header_generation(leaf);
7315
7316         rec->objectid = key->objectid;
7317         rec->type = key->type;
7318         rec->offset = key->offset;
7319
7320         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7321         rec->chunk_objecteid =
7322                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7323         rec->chunk_offset =
7324                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7325         rec->length = btrfs_dev_extent_length(leaf, ptr);
7326         rec->cache.size = rec->length;
7327
7328         INIT_LIST_HEAD(&rec->chunk_list);
7329         INIT_LIST_HEAD(&rec->device_list);
7330
7331         return rec;
7332 }
7333
7334 static int
7335 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7336                            struct btrfs_key *key, struct extent_buffer *eb,
7337                            int slot)
7338 {
7339         struct device_extent_record *rec;
7340         int ret;
7341
7342         rec = btrfs_new_device_extent_record(eb, key, slot);
7343         ret = insert_device_extent_record(dev_extent_cache, rec);
7344         if (ret) {
7345                 fprintf(stderr,
7346                         "Device extent[%llu, %llu, %llu] existed.\n",
7347                         rec->objectid, rec->offset, rec->length);
7348                 free(rec);
7349         }
7350
7351         return ret;
7352 }
7353
7354 static int process_extent_item(struct btrfs_root *root,
7355                                struct cache_tree *extent_cache,
7356                                struct extent_buffer *eb, int slot)
7357 {
7358         struct btrfs_extent_item *ei;
7359         struct btrfs_extent_inline_ref *iref;
7360         struct btrfs_extent_data_ref *dref;
7361         struct btrfs_shared_data_ref *sref;
7362         struct btrfs_key key;
7363         struct extent_record tmpl;
7364         unsigned long end;
7365         unsigned long ptr;
7366         int ret;
7367         int type;
7368         u32 item_size = btrfs_item_size_nr(eb, slot);
7369         u64 refs = 0;
7370         u64 offset;
7371         u64 num_bytes;
7372         int metadata = 0;
7373
7374         btrfs_item_key_to_cpu(eb, &key, slot);
7375
7376         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7377                 metadata = 1;
7378                 num_bytes = root->fs_info->nodesize;
7379         } else {
7380                 num_bytes = key.offset;
7381         }
7382
7383         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7384                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7385                       key.objectid, root->fs_info->sectorsize);
7386                 return -EIO;
7387         }
7388         if (item_size < sizeof(*ei)) {
7389 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7390                 struct btrfs_extent_item_v0 *ei0;
7391                 BUG_ON(item_size != sizeof(*ei0));
7392                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7393                 refs = btrfs_extent_refs_v0(eb, ei0);
7394 #else
7395                 BUG();
7396 #endif
7397                 memset(&tmpl, 0, sizeof(tmpl));
7398                 tmpl.start = key.objectid;
7399                 tmpl.nr = num_bytes;
7400                 tmpl.extent_item_refs = refs;
7401                 tmpl.metadata = metadata;
7402                 tmpl.found_rec = 1;
7403                 tmpl.max_size = num_bytes;
7404
7405                 return add_extent_rec(extent_cache, &tmpl);
7406         }
7407
7408         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7409         refs = btrfs_extent_refs(eb, ei);
7410         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7411                 metadata = 1;
7412         else
7413                 metadata = 0;
7414         if (metadata && num_bytes != root->fs_info->nodesize) {
7415                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7416                       num_bytes, root->fs_info->nodesize);
7417                 return -EIO;
7418         }
7419         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7420                 error("ignore invalid data extent, length %llu is not aligned to %u",
7421                       num_bytes, root->fs_info->sectorsize);
7422                 return -EIO;
7423         }
7424
7425         memset(&tmpl, 0, sizeof(tmpl));
7426         tmpl.start = key.objectid;
7427         tmpl.nr = num_bytes;
7428         tmpl.extent_item_refs = refs;
7429         tmpl.metadata = metadata;
7430         tmpl.found_rec = 1;
7431         tmpl.max_size = num_bytes;
7432         add_extent_rec(extent_cache, &tmpl);
7433
7434         ptr = (unsigned long)(ei + 1);
7435         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7436             key.type == BTRFS_EXTENT_ITEM_KEY)
7437                 ptr += sizeof(struct btrfs_tree_block_info);
7438
7439         end = (unsigned long)ei + item_size;
7440         while (ptr < end) {
7441                 iref = (struct btrfs_extent_inline_ref *)ptr;
7442                 type = btrfs_extent_inline_ref_type(eb, iref);
7443                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7444                 switch (type) {
7445                 case BTRFS_TREE_BLOCK_REF_KEY:
7446                         ret = add_tree_backref(extent_cache, key.objectid,
7447                                         0, offset, 0);
7448                         if (ret < 0)
7449                                 error(
7450                         "add_tree_backref failed (extent items tree block): %s",
7451                                       strerror(-ret));
7452                         break;
7453                 case BTRFS_SHARED_BLOCK_REF_KEY:
7454                         ret = add_tree_backref(extent_cache, key.objectid,
7455                                         offset, 0, 0);
7456                         if (ret < 0)
7457                                 error(
7458                         "add_tree_backref failed (extent items shared block): %s",
7459                                       strerror(-ret));
7460                         break;
7461                 case BTRFS_EXTENT_DATA_REF_KEY:
7462                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7463                         add_data_backref(extent_cache, key.objectid, 0,
7464                                         btrfs_extent_data_ref_root(eb, dref),
7465                                         btrfs_extent_data_ref_objectid(eb,
7466                                                                        dref),
7467                                         btrfs_extent_data_ref_offset(eb, dref),
7468                                         btrfs_extent_data_ref_count(eb, dref),
7469                                         0, num_bytes);
7470                         break;
7471                 case BTRFS_SHARED_DATA_REF_KEY:
7472                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7473                         add_data_backref(extent_cache, key.objectid, offset,
7474                                         0, 0, 0,
7475                                         btrfs_shared_data_ref_count(eb, sref),
7476                                         0, num_bytes);
7477                         break;
7478                 default:
7479                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7480                                 key.objectid, key.type, num_bytes);
7481                         goto out;
7482                 }
7483                 ptr += btrfs_extent_inline_ref_size(type);
7484         }
7485         WARN_ON(ptr > end);
7486 out:
7487         return 0;
7488 }
7489
7490 static int check_cache_range(struct btrfs_root *root,
7491                              struct btrfs_block_group_cache *cache,
7492                              u64 offset, u64 bytes)
7493 {
7494         struct btrfs_free_space *entry;
7495         u64 *logical;
7496         u64 bytenr;
7497         int stripe_len;
7498         int i, nr, ret;
7499
7500         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7501                 bytenr = btrfs_sb_offset(i);
7502                 ret = btrfs_rmap_block(root->fs_info,
7503                                        cache->key.objectid, bytenr, 0,
7504                                        &logical, &nr, &stripe_len);
7505                 if (ret)
7506                         return ret;
7507
7508                 while (nr--) {
7509                         if (logical[nr] + stripe_len <= offset)
7510                                 continue;
7511                         if (offset + bytes <= logical[nr])
7512                                 continue;
7513                         if (logical[nr] == offset) {
7514                                 if (stripe_len >= bytes) {
7515                                         free(logical);
7516                                         return 0;
7517                                 }
7518                                 bytes -= stripe_len;
7519                                 offset += stripe_len;
7520                         } else if (logical[nr] < offset) {
7521                                 if (logical[nr] + stripe_len >=
7522                                     offset + bytes) {
7523                                         free(logical);
7524                                         return 0;
7525                                 }
7526                                 bytes = (offset + bytes) -
7527                                         (logical[nr] + stripe_len);
7528                                 offset = logical[nr] + stripe_len;
7529                         } else {
7530                                 /*
7531                                  * Could be tricky, the super may land in the
7532                                  * middle of the area we're checking.  First
7533                                  * check the easiest case, it's at the end.
7534                                  */
7535                                 if (logical[nr] + stripe_len >=
7536                                     bytes + offset) {
7537                                         bytes = logical[nr] - offset;
7538                                         continue;
7539                                 }
7540
7541                                 /* Check the left side */
7542                                 ret = check_cache_range(root, cache,
7543                                                         offset,
7544                                                         logical[nr] - offset);
7545                                 if (ret) {
7546                                         free(logical);
7547                                         return ret;
7548                                 }
7549
7550                                 /* Now we continue with the right side */
7551                                 bytes = (offset + bytes) -
7552                                         (logical[nr] + stripe_len);
7553                                 offset = logical[nr] + stripe_len;
7554                         }
7555                 }
7556
7557                 free(logical);
7558         }
7559
7560         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7561         if (!entry) {
7562                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7563                         offset, offset+bytes);
7564                 return -EINVAL;
7565         }
7566
7567         if (entry->offset != offset) {
7568                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7569                         entry->offset);
7570                 return -EINVAL;
7571         }
7572
7573         if (entry->bytes != bytes) {
7574                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7575                         bytes, entry->bytes, offset);
7576                 return -EINVAL;
7577         }
7578
7579         unlink_free_space(cache->free_space_ctl, entry);
7580         free(entry);
7581         return 0;
7582 }
7583
7584 static int verify_space_cache(struct btrfs_root *root,
7585                               struct btrfs_block_group_cache *cache)
7586 {
7587         struct btrfs_path path;
7588         struct extent_buffer *leaf;
7589         struct btrfs_key key;
7590         u64 last;
7591         int ret = 0;
7592
7593         root = root->fs_info->extent_root;
7594
7595         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7596
7597         btrfs_init_path(&path);
7598         key.objectid = last;
7599         key.offset = 0;
7600         key.type = BTRFS_EXTENT_ITEM_KEY;
7601         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7602         if (ret < 0)
7603                 goto out;
7604         ret = 0;
7605         while (1) {
7606                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7607                         ret = btrfs_next_leaf(root, &path);
7608                         if (ret < 0)
7609                                 goto out;
7610                         if (ret > 0) {
7611                                 ret = 0;
7612                                 break;
7613                         }
7614                 }
7615                 leaf = path.nodes[0];
7616                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7617                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7618                         break;
7619                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7620                     key.type != BTRFS_METADATA_ITEM_KEY) {
7621                         path.slots[0]++;
7622                         continue;
7623                 }
7624
7625                 if (last == key.objectid) {
7626                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7627                                 last = key.objectid + key.offset;
7628                         else
7629                                 last = key.objectid + root->fs_info->nodesize;
7630                         path.slots[0]++;
7631                         continue;
7632                 }
7633
7634                 ret = check_cache_range(root, cache, last,
7635                                         key.objectid - last);
7636                 if (ret)
7637                         break;
7638                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7639                         last = key.objectid + key.offset;
7640                 else
7641                         last = key.objectid + root->fs_info->nodesize;
7642                 path.slots[0]++;
7643         }
7644
7645         if (last < cache->key.objectid + cache->key.offset)
7646                 ret = check_cache_range(root, cache, last,
7647                                         cache->key.objectid +
7648                                         cache->key.offset - last);
7649
7650 out:
7651         btrfs_release_path(&path);
7652
7653         if (!ret &&
7654             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7655                 fprintf(stderr, "There are still entries left in the space "
7656                         "cache\n");
7657                 ret = -EINVAL;
7658         }
7659
7660         return ret;
7661 }
7662
7663 static int check_space_cache(struct btrfs_root *root)
7664 {
7665         struct btrfs_block_group_cache *cache;
7666         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7667         int ret;
7668         int error = 0;
7669
7670         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7671             btrfs_super_generation(root->fs_info->super_copy) !=
7672             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7673                 printf("cache and super generation don't match, space cache "
7674                        "will be invalidated\n");
7675                 return 0;
7676         }
7677
7678         if (ctx.progress_enabled) {
7679                 ctx.tp = TASK_FREE_SPACE;
7680                 task_start(ctx.info);
7681         }
7682
7683         while (1) {
7684                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7685                 if (!cache)
7686                         break;
7687
7688                 start = cache->key.objectid + cache->key.offset;
7689                 if (!cache->free_space_ctl) {
7690                         if (btrfs_init_free_space_ctl(cache,
7691                                                 root->fs_info->sectorsize)) {
7692                                 ret = -ENOMEM;
7693                                 break;
7694                         }
7695                 } else {
7696                         btrfs_remove_free_space_cache(cache);
7697                 }
7698
7699                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7700                         ret = exclude_super_stripes(root, cache);
7701                         if (ret) {
7702                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7703                                         strerror(-ret));
7704                                 error++;
7705                                 continue;
7706                         }
7707                         ret = load_free_space_tree(root->fs_info, cache);
7708                         free_excluded_extents(root, cache);
7709                         if (ret < 0) {
7710                                 fprintf(stderr, "could not load free space tree: %s\n",
7711                                         strerror(-ret));
7712                                 error++;
7713                                 continue;
7714                         }
7715                         error += ret;
7716                 } else {
7717                         ret = load_free_space_cache(root->fs_info, cache);
7718                         if (!ret)
7719                                 continue;
7720                 }
7721
7722                 ret = verify_space_cache(root, cache);
7723                 if (ret) {
7724                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7725                                 cache->key.objectid);
7726                         error++;
7727                 }
7728         }
7729
7730         task_stop(ctx.info);
7731
7732         return error ? -EINVAL : 0;
7733 }
7734
7735 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7736                         u64 num_bytes, unsigned long leaf_offset,
7737                         struct extent_buffer *eb) {
7738
7739         struct btrfs_fs_info *fs_info = root->fs_info;
7740         u64 offset = 0;
7741         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7742         char *data;
7743         unsigned long csum_offset;
7744         u32 csum;
7745         u32 csum_expected;
7746         u64 read_len;
7747         u64 data_checked = 0;
7748         u64 tmp;
7749         int ret = 0;
7750         int mirror;
7751         int num_copies;
7752
7753         if (num_bytes % fs_info->sectorsize)
7754                 return -EINVAL;
7755
7756         data = malloc(num_bytes);
7757         if (!data)
7758                 return -ENOMEM;
7759
7760         while (offset < num_bytes) {
7761                 mirror = 0;
7762 again:
7763                 read_len = num_bytes - offset;
7764                 /* read as much space once a time */
7765                 ret = read_extent_data(fs_info, data + offset,
7766                                 bytenr + offset, &read_len, mirror);
7767                 if (ret)
7768                         goto out;
7769                 data_checked = 0;
7770                 /* verify every 4k data's checksum */
7771                 while (data_checked < read_len) {
7772                         csum = ~(u32)0;
7773                         tmp = offset + data_checked;
7774
7775                         csum = btrfs_csum_data((char *)data + tmp,
7776                                                csum, fs_info->sectorsize);
7777                         btrfs_csum_final(csum, (u8 *)&csum);
7778
7779                         csum_offset = leaf_offset +
7780                                  tmp / fs_info->sectorsize * csum_size;
7781                         read_extent_buffer(eb, (char *)&csum_expected,
7782                                            csum_offset, csum_size);
7783                         /* try another mirror */
7784                         if (csum != csum_expected) {
7785                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7786                                                 mirror, bytenr + tmp,
7787                                                 csum, csum_expected);
7788                                 num_copies = btrfs_num_copies(root->fs_info,
7789                                                 bytenr, num_bytes);
7790                                 if (mirror < num_copies - 1) {
7791                                         mirror += 1;
7792                                         goto again;
7793                                 }
7794                         }
7795                         data_checked += fs_info->sectorsize;
7796                 }
7797                 offset += read_len;
7798         }
7799 out:
7800         free(data);
7801         return ret;
7802 }
7803
7804 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7805                                u64 num_bytes)
7806 {
7807         struct btrfs_path path;
7808         struct extent_buffer *leaf;
7809         struct btrfs_key key;
7810         int ret;
7811
7812         btrfs_init_path(&path);
7813         key.objectid = bytenr;
7814         key.type = BTRFS_EXTENT_ITEM_KEY;
7815         key.offset = (u64)-1;
7816
7817 again:
7818         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7819                                 0, 0);
7820         if (ret < 0) {
7821                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7822                 btrfs_release_path(&path);
7823                 return ret;
7824         } else if (ret) {
7825                 if (path.slots[0] > 0) {
7826                         path.slots[0]--;
7827                 } else {
7828                         ret = btrfs_prev_leaf(root, &path);
7829                         if (ret < 0) {
7830                                 goto out;
7831                         } else if (ret > 0) {
7832                                 ret = 0;
7833                                 goto out;
7834                         }
7835                 }
7836         }
7837
7838         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7839
7840         /*
7841          * Block group items come before extent items if they have the same
7842          * bytenr, so walk back one more just in case.  Dear future traveller,
7843          * first congrats on mastering time travel.  Now if it's not too much
7844          * trouble could you go back to 2006 and tell Chris to make the
7845          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7846          * EXTENT_ITEM_KEY please?
7847          */
7848         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7849                 if (path.slots[0] > 0) {
7850                         path.slots[0]--;
7851                 } else {
7852                         ret = btrfs_prev_leaf(root, &path);
7853                         if (ret < 0) {
7854                                 goto out;
7855                         } else if (ret > 0) {
7856                                 ret = 0;
7857                                 goto out;
7858                         }
7859                 }
7860                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7861         }
7862
7863         while (num_bytes) {
7864                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7865                         ret = btrfs_next_leaf(root, &path);
7866                         if (ret < 0) {
7867                                 fprintf(stderr, "Error going to next leaf "
7868                                         "%d\n", ret);
7869                                 btrfs_release_path(&path);
7870                                 return ret;
7871                         } else if (ret) {
7872                                 break;
7873                         }
7874                 }
7875                 leaf = path.nodes[0];
7876                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7877                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7878                         path.slots[0]++;
7879                         continue;
7880                 }
7881                 if (key.objectid + key.offset < bytenr) {
7882                         path.slots[0]++;
7883                         continue;
7884                 }
7885                 if (key.objectid > bytenr + num_bytes)
7886                         break;
7887
7888                 if (key.objectid == bytenr) {
7889                         if (key.offset >= num_bytes) {
7890                                 num_bytes = 0;
7891                                 break;
7892                         }
7893                         num_bytes -= key.offset;
7894                         bytenr += key.offset;
7895                 } else if (key.objectid < bytenr) {
7896                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7897                                 num_bytes = 0;
7898                                 break;
7899                         }
7900                         num_bytes = (bytenr + num_bytes) -
7901                                 (key.objectid + key.offset);
7902                         bytenr = key.objectid + key.offset;
7903                 } else {
7904                         if (key.objectid + key.offset < bytenr + num_bytes) {
7905                                 u64 new_start = key.objectid + key.offset;
7906                                 u64 new_bytes = bytenr + num_bytes - new_start;
7907
7908                                 /*
7909                                  * Weird case, the extent is in the middle of
7910                                  * our range, we'll have to search one side
7911                                  * and then the other.  Not sure if this happens
7912                                  * in real life, but no harm in coding it up
7913                                  * anyway just in case.
7914                                  */
7915                                 btrfs_release_path(&path);
7916                                 ret = check_extent_exists(root, new_start,
7917                                                           new_bytes);
7918                                 if (ret) {
7919                                         fprintf(stderr, "Right section didn't "
7920                                                 "have a record\n");
7921                                         break;
7922                                 }
7923                                 num_bytes = key.objectid - bytenr;
7924                                 goto again;
7925                         }
7926                         num_bytes = key.objectid - bytenr;
7927                 }
7928                 path.slots[0]++;
7929         }
7930         ret = 0;
7931
7932 out:
7933         if (num_bytes && !ret) {
7934                 fprintf(stderr, "There are no extents for csum range "
7935                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7936                 ret = 1;
7937         }
7938
7939         btrfs_release_path(&path);
7940         return ret;
7941 }
7942
7943 static int check_csums(struct btrfs_root *root)
7944 {
7945         struct btrfs_path path;
7946         struct extent_buffer *leaf;
7947         struct btrfs_key key;
7948         u64 offset = 0, num_bytes = 0;
7949         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7950         int errors = 0;
7951         int ret;
7952         u64 data_len;
7953         unsigned long leaf_offset;
7954
7955         root = root->fs_info->csum_root;
7956         if (!extent_buffer_uptodate(root->node)) {
7957                 fprintf(stderr, "No valid csum tree found\n");
7958                 return -ENOENT;
7959         }
7960
7961         btrfs_init_path(&path);
7962         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7963         key.type = BTRFS_EXTENT_CSUM_KEY;
7964         key.offset = 0;
7965         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7966         if (ret < 0) {
7967                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7968                 btrfs_release_path(&path);
7969                 return ret;
7970         }
7971
7972         if (ret > 0 && path.slots[0])
7973                 path.slots[0]--;
7974         ret = 0;
7975
7976         while (1) {
7977                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7978                         ret = btrfs_next_leaf(root, &path);
7979                         if (ret < 0) {
7980                                 fprintf(stderr, "Error going to next leaf "
7981                                         "%d\n", ret);
7982                                 break;
7983                         }
7984                         if (ret)
7985                                 break;
7986                 }
7987                 leaf = path.nodes[0];
7988
7989                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7990                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7991                         path.slots[0]++;
7992                         continue;
7993                 }
7994
7995                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7996                               csum_size) * root->fs_info->sectorsize;
7997                 if (!check_data_csum)
7998                         goto skip_csum_check;
7999                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8000                 ret = check_extent_csums(root, key.offset, data_len,
8001                                          leaf_offset, leaf);
8002                 if (ret)
8003                         break;
8004 skip_csum_check:
8005                 if (!num_bytes) {
8006                         offset = key.offset;
8007                 } else if (key.offset != offset + num_bytes) {
8008                         ret = check_extent_exists(root, offset, num_bytes);
8009                         if (ret) {
8010                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8011                                         "there is no extent record\n",
8012                                         offset, offset+num_bytes);
8013                                 errors++;
8014                         }
8015                         offset = key.offset;
8016                         num_bytes = 0;
8017                 }
8018                 num_bytes += data_len;
8019                 path.slots[0]++;
8020         }
8021
8022         btrfs_release_path(&path);
8023         return errors;
8024 }
8025
8026 static int is_dropped_key(struct btrfs_key *key,
8027                           struct btrfs_key *drop_key) {
8028         if (key->objectid < drop_key->objectid)
8029                 return 1;
8030         else if (key->objectid == drop_key->objectid) {
8031                 if (key->type < drop_key->type)
8032                         return 1;
8033                 else if (key->type == drop_key->type) {
8034                         if (key->offset < drop_key->offset)
8035                                 return 1;
8036                 }
8037         }
8038         return 0;
8039 }
8040
8041 /*
8042  * Here are the rules for FULL_BACKREF.
8043  *
8044  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8045  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8046  *      FULL_BACKREF set.
8047  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8048  *    if it happened after the relocation occurred since we'll have dropped the
8049  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8050  *    have no real way to know for sure.
8051  *
8052  * We process the blocks one root at a time, and we start from the lowest root
8053  * objectid and go to the highest.  So we can just lookup the owner backref for
8054  * the record and if we don't find it then we know it doesn't exist and we have
8055  * a FULL BACKREF.
8056  *
8057  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8058  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8059  * be set or not and then we can check later once we've gathered all the refs.
8060  */
8061 static int calc_extent_flag(struct cache_tree *extent_cache,
8062                            struct extent_buffer *buf,
8063                            struct root_item_record *ri,
8064                            u64 *flags)
8065 {
8066         struct extent_record *rec;
8067         struct cache_extent *cache;
8068         struct tree_backref *tback;
8069         u64 owner = 0;
8070
8071         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8072         /* we have added this extent before */
8073         if (!cache)
8074                 return -ENOENT;
8075
8076         rec = container_of(cache, struct extent_record, cache);
8077
8078         /*
8079          * Except file/reloc tree, we can not have
8080          * FULL BACKREF MODE
8081          */
8082         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8083                 goto normal;
8084         /*
8085          * root node
8086          */
8087         if (buf->start == ri->bytenr)
8088                 goto normal;
8089
8090         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8091                 goto full_backref;
8092
8093         owner = btrfs_header_owner(buf);
8094         if (owner == ri->objectid)
8095                 goto normal;
8096
8097         tback = find_tree_backref(rec, 0, owner);
8098         if (!tback)
8099                 goto full_backref;
8100 normal:
8101         *flags = 0;
8102         if (rec->flag_block_full_backref != FLAG_UNSET &&
8103             rec->flag_block_full_backref != 0)
8104                 rec->bad_full_backref = 1;
8105         return 0;
8106 full_backref:
8107         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8108         if (rec->flag_block_full_backref != FLAG_UNSET &&
8109             rec->flag_block_full_backref != 1)
8110                 rec->bad_full_backref = 1;
8111         return 0;
8112 }
8113
8114 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8115 {
8116         fprintf(stderr, "Invalid key type(");
8117         print_key_type(stderr, 0, key_type);
8118         fprintf(stderr, ") found in root(");
8119         print_objectid(stderr, rootid, 0);
8120         fprintf(stderr, ")\n");
8121 }
8122
8123 /*
8124  * Check if the key is valid with its extent buffer.
8125  *
8126  * This is a early check in case invalid key exists in a extent buffer
8127  * This is not comprehensive yet, but should prevent wrong key/item passed
8128  * further
8129  */
8130 static int check_type_with_root(u64 rootid, u8 key_type)
8131 {
8132         switch (key_type) {
8133         /* Only valid in chunk tree */
8134         case BTRFS_DEV_ITEM_KEY:
8135         case BTRFS_CHUNK_ITEM_KEY:
8136                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8137                         goto err;
8138                 break;
8139         /* valid in csum and log tree */
8140         case BTRFS_CSUM_TREE_OBJECTID:
8141                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8142                       is_fstree(rootid)))
8143                         goto err;
8144                 break;
8145         case BTRFS_EXTENT_ITEM_KEY:
8146         case BTRFS_METADATA_ITEM_KEY:
8147         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8148                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8149                         goto err;
8150                 break;
8151         case BTRFS_ROOT_ITEM_KEY:
8152                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8153                         goto err;
8154                 break;
8155         case BTRFS_DEV_EXTENT_KEY:
8156                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8157                         goto err;
8158                 break;
8159         }
8160         return 0;
8161 err:
8162         report_mismatch_key_root(key_type, rootid);
8163         return -EINVAL;
8164 }
8165
8166 static int run_next_block(struct btrfs_root *root,
8167                           struct block_info *bits,
8168                           int bits_nr,
8169                           u64 *last,
8170                           struct cache_tree *pending,
8171                           struct cache_tree *seen,
8172                           struct cache_tree *reada,
8173                           struct cache_tree *nodes,
8174                           struct cache_tree *extent_cache,
8175                           struct cache_tree *chunk_cache,
8176                           struct rb_root *dev_cache,
8177                           struct block_group_tree *block_group_cache,
8178                           struct device_extent_tree *dev_extent_cache,
8179                           struct root_item_record *ri)
8180 {
8181         struct btrfs_fs_info *fs_info = root->fs_info;
8182         struct extent_buffer *buf;
8183         struct extent_record *rec = NULL;
8184         u64 bytenr;
8185         u32 size;
8186         u64 parent;
8187         u64 owner;
8188         u64 flags;
8189         u64 ptr;
8190         u64 gen = 0;
8191         int ret = 0;
8192         int i;
8193         int nritems;
8194         struct btrfs_key key;
8195         struct cache_extent *cache;
8196         int reada_bits;
8197
8198         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8199                                     bits_nr, &reada_bits);
8200         if (nritems == 0)
8201                 return 1;
8202
8203         if (!reada_bits) {
8204                 for(i = 0; i < nritems; i++) {
8205                         ret = add_cache_extent(reada, bits[i].start,
8206                                                bits[i].size);
8207                         if (ret == -EEXIST)
8208                                 continue;
8209
8210                         /* fixme, get the parent transid */
8211                         readahead_tree_block(fs_info, bits[i].start, 0);
8212                 }
8213         }
8214         *last = bits[0].start;
8215         bytenr = bits[0].start;
8216         size = bits[0].size;
8217
8218         cache = lookup_cache_extent(pending, bytenr, size);
8219         if (cache) {
8220                 remove_cache_extent(pending, cache);
8221                 free(cache);
8222         }
8223         cache = lookup_cache_extent(reada, bytenr, size);
8224         if (cache) {
8225                 remove_cache_extent(reada, cache);
8226                 free(cache);
8227         }
8228         cache = lookup_cache_extent(nodes, bytenr, size);
8229         if (cache) {
8230                 remove_cache_extent(nodes, cache);
8231                 free(cache);
8232         }
8233         cache = lookup_cache_extent(extent_cache, bytenr, size);
8234         if (cache) {
8235                 rec = container_of(cache, struct extent_record, cache);
8236                 gen = rec->parent_generation;
8237         }
8238
8239         /* fixme, get the real parent transid */
8240         buf = read_tree_block(root->fs_info, bytenr, gen);
8241         if (!extent_buffer_uptodate(buf)) {
8242                 record_bad_block_io(root->fs_info,
8243                                     extent_cache, bytenr, size);
8244                 goto out;
8245         }
8246
8247         nritems = btrfs_header_nritems(buf);
8248
8249         flags = 0;
8250         if (!init_extent_tree) {
8251                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8252                                        btrfs_header_level(buf), 1, NULL,
8253                                        &flags);
8254                 if (ret < 0) {
8255                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8256                         if (ret < 0) {
8257                                 fprintf(stderr, "Couldn't calc extent flags\n");
8258                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8259                         }
8260                 }
8261         } else {
8262                 flags = 0;
8263                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8264                 if (ret < 0) {
8265                         fprintf(stderr, "Couldn't calc extent flags\n");
8266                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8267                 }
8268         }
8269
8270         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8271                 if (ri != NULL &&
8272                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8273                     ri->objectid == btrfs_header_owner(buf)) {
8274                         /*
8275                          * Ok we got to this block from it's original owner and
8276                          * we have FULL_BACKREF set.  Relocation can leave
8277                          * converted blocks over so this is altogether possible,
8278                          * however it's not possible if the generation > the
8279                          * last snapshot, so check for this case.
8280                          */
8281                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8282                             btrfs_header_generation(buf) > ri->last_snapshot) {
8283                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8284                                 rec->bad_full_backref = 1;
8285                         }
8286                 }
8287         } else {
8288                 if (ri != NULL &&
8289                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8290                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8291                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8292                         rec->bad_full_backref = 1;
8293                 }
8294         }
8295
8296         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8297                 rec->flag_block_full_backref = 1;
8298                 parent = bytenr;
8299                 owner = 0;
8300         } else {
8301                 rec->flag_block_full_backref = 0;
8302                 parent = 0;
8303                 owner = btrfs_header_owner(buf);
8304         }
8305
8306         ret = check_block(root, extent_cache, buf, flags);
8307         if (ret)
8308                 goto out;
8309
8310         if (btrfs_is_leaf(buf)) {
8311                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8312                 for (i = 0; i < nritems; i++) {
8313                         struct btrfs_file_extent_item *fi;
8314                         btrfs_item_key_to_cpu(buf, &key, i);
8315                         /*
8316                          * Check key type against the leaf owner.
8317                          * Could filter quite a lot of early error if
8318                          * owner is correct
8319                          */
8320                         if (check_type_with_root(btrfs_header_owner(buf),
8321                                                  key.type)) {
8322                                 fprintf(stderr, "ignoring invalid key\n");
8323                                 continue;
8324                         }
8325                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8326                                 process_extent_item(root, extent_cache, buf,
8327                                                     i);
8328                                 continue;
8329                         }
8330                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8331                                 process_extent_item(root, extent_cache, buf,
8332                                                     i);
8333                                 continue;
8334                         }
8335                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8336                                 total_csum_bytes +=
8337                                         btrfs_item_size_nr(buf, i);
8338                                 continue;
8339                         }
8340                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8341                                 process_chunk_item(chunk_cache, &key, buf, i);
8342                                 continue;
8343                         }
8344                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8345                                 process_device_item(dev_cache, &key, buf, i);
8346                                 continue;
8347                         }
8348                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8349                                 process_block_group_item(block_group_cache,
8350                                         &key, buf, i);
8351                                 continue;
8352                         }
8353                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8354                                 process_device_extent_item(dev_extent_cache,
8355                                         &key, buf, i);
8356                                 continue;
8357
8358                         }
8359                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8360 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8361                                 process_extent_ref_v0(extent_cache, buf, i);
8362 #else
8363                                 BUG();
8364 #endif
8365                                 continue;
8366                         }
8367
8368                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8369                                 ret = add_tree_backref(extent_cache,
8370                                                 key.objectid, 0, key.offset, 0);
8371                                 if (ret < 0)
8372                                         error(
8373                                 "add_tree_backref failed (leaf tree block): %s",
8374                                               strerror(-ret));
8375                                 continue;
8376                         }
8377                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8378                                 ret = add_tree_backref(extent_cache,
8379                                                 key.objectid, key.offset, 0, 0);
8380                                 if (ret < 0)
8381                                         error(
8382                                 "add_tree_backref failed (leaf shared block): %s",
8383                                               strerror(-ret));
8384                                 continue;
8385                         }
8386                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8387                                 struct btrfs_extent_data_ref *ref;
8388                                 ref = btrfs_item_ptr(buf, i,
8389                                                 struct btrfs_extent_data_ref);
8390                                 add_data_backref(extent_cache,
8391                                         key.objectid, 0,
8392                                         btrfs_extent_data_ref_root(buf, ref),
8393                                         btrfs_extent_data_ref_objectid(buf,
8394                                                                        ref),
8395                                         btrfs_extent_data_ref_offset(buf, ref),
8396                                         btrfs_extent_data_ref_count(buf, ref),
8397                                         0, root->fs_info->sectorsize);
8398                                 continue;
8399                         }
8400                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8401                                 struct btrfs_shared_data_ref *ref;
8402                                 ref = btrfs_item_ptr(buf, i,
8403                                                 struct btrfs_shared_data_ref);
8404                                 add_data_backref(extent_cache,
8405                                         key.objectid, key.offset, 0, 0, 0,
8406                                         btrfs_shared_data_ref_count(buf, ref),
8407                                         0, root->fs_info->sectorsize);
8408                                 continue;
8409                         }
8410                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8411                                 struct bad_item *bad;
8412
8413                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8414                                         continue;
8415                                 if (!owner)
8416                                         continue;
8417                                 bad = malloc(sizeof(struct bad_item));
8418                                 if (!bad)
8419                                         continue;
8420                                 INIT_LIST_HEAD(&bad->list);
8421                                 memcpy(&bad->key, &key,
8422                                        sizeof(struct btrfs_key));
8423                                 bad->root_id = owner;
8424                                 list_add_tail(&bad->list, &delete_items);
8425                                 continue;
8426                         }
8427                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8428                                 continue;
8429                         fi = btrfs_item_ptr(buf, i,
8430                                             struct btrfs_file_extent_item);
8431                         if (btrfs_file_extent_type(buf, fi) ==
8432                             BTRFS_FILE_EXTENT_INLINE)
8433                                 continue;
8434                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8435                                 continue;
8436
8437                         data_bytes_allocated +=
8438                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8439                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8440                                 abort();
8441                         }
8442                         data_bytes_referenced +=
8443                                 btrfs_file_extent_num_bytes(buf, fi);
8444                         add_data_backref(extent_cache,
8445                                 btrfs_file_extent_disk_bytenr(buf, fi),
8446                                 parent, owner, key.objectid, key.offset -
8447                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8448                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8449                 }
8450         } else {
8451                 int level;
8452                 struct btrfs_key first_key;
8453
8454                 first_key.objectid = 0;
8455
8456                 if (nritems > 0)
8457                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8458                 level = btrfs_header_level(buf);
8459                 for (i = 0; i < nritems; i++) {
8460                         struct extent_record tmpl;
8461
8462                         ptr = btrfs_node_blockptr(buf, i);
8463                         size = root->fs_info->nodesize;
8464                         btrfs_node_key_to_cpu(buf, &key, i);
8465                         if (ri != NULL) {
8466                                 if ((level == ri->drop_level)
8467                                     && is_dropped_key(&key, &ri->drop_key)) {
8468                                         continue;
8469                                 }
8470                         }
8471
8472                         memset(&tmpl, 0, sizeof(tmpl));
8473                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8474                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8475                         tmpl.start = ptr;
8476                         tmpl.nr = size;
8477                         tmpl.refs = 1;
8478                         tmpl.metadata = 1;
8479                         tmpl.max_size = size;
8480                         ret = add_extent_rec(extent_cache, &tmpl);
8481                         if (ret < 0)
8482                                 goto out;
8483
8484                         ret = add_tree_backref(extent_cache, ptr, parent,
8485                                         owner, 1);
8486                         if (ret < 0) {
8487                                 error(
8488                                 "add_tree_backref failed (non-leaf block): %s",
8489                                       strerror(-ret));
8490                                 continue;
8491                         }
8492
8493                         if (level > 1) {
8494                                 add_pending(nodes, seen, ptr, size);
8495                         } else {
8496                                 add_pending(pending, seen, ptr, size);
8497                         }
8498                 }
8499                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8500                                       nritems) * sizeof(struct btrfs_key_ptr);
8501         }
8502         total_btree_bytes += buf->len;
8503         if (fs_root_objectid(btrfs_header_owner(buf)))
8504                 total_fs_tree_bytes += buf->len;
8505         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8506                 total_extent_tree_bytes += buf->len;
8507 out:
8508         free_extent_buffer(buf);
8509         return ret;
8510 }
8511
8512 static int add_root_to_pending(struct extent_buffer *buf,
8513                                struct cache_tree *extent_cache,
8514                                struct cache_tree *pending,
8515                                struct cache_tree *seen,
8516                                struct cache_tree *nodes,
8517                                u64 objectid)
8518 {
8519         struct extent_record tmpl;
8520         int ret;
8521
8522         if (btrfs_header_level(buf) > 0)
8523                 add_pending(nodes, seen, buf->start, buf->len);
8524         else
8525                 add_pending(pending, seen, buf->start, buf->len);
8526
8527         memset(&tmpl, 0, sizeof(tmpl));
8528         tmpl.start = buf->start;
8529         tmpl.nr = buf->len;
8530         tmpl.is_root = 1;
8531         tmpl.refs = 1;
8532         tmpl.metadata = 1;
8533         tmpl.max_size = buf->len;
8534         add_extent_rec(extent_cache, &tmpl);
8535
8536         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8537             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8538                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8539                                 0, 1);
8540         else
8541                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8542                                 1);
8543         return ret;
8544 }
8545
8546 /* as we fix the tree, we might be deleting blocks that
8547  * we're tracking for repair.  This hook makes sure we
8548  * remove any backrefs for blocks as we are fixing them.
8549  */
8550 static int free_extent_hook(struct btrfs_trans_handle *trans,
8551                             struct btrfs_root *root,
8552                             u64 bytenr, u64 num_bytes, u64 parent,
8553                             u64 root_objectid, u64 owner, u64 offset,
8554                             int refs_to_drop)
8555 {
8556         struct extent_record *rec;
8557         struct cache_extent *cache;
8558         int is_data;
8559         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8560
8561         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8562         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8563         if (!cache)
8564                 return 0;
8565
8566         rec = container_of(cache, struct extent_record, cache);
8567         if (is_data) {
8568                 struct data_backref *back;
8569                 back = find_data_backref(rec, parent, root_objectid, owner,
8570                                          offset, 1, bytenr, num_bytes);
8571                 if (!back)
8572                         goto out;
8573                 if (back->node.found_ref) {
8574                         back->found_ref -= refs_to_drop;
8575                         if (rec->refs)
8576                                 rec->refs -= refs_to_drop;
8577                 }
8578                 if (back->node.found_extent_tree) {
8579                         back->num_refs -= refs_to_drop;
8580                         if (rec->extent_item_refs)
8581                                 rec->extent_item_refs -= refs_to_drop;
8582                 }
8583                 if (back->found_ref == 0)
8584                         back->node.found_ref = 0;
8585                 if (back->num_refs == 0)
8586                         back->node.found_extent_tree = 0;
8587
8588                 if (!back->node.found_extent_tree && back->node.found_ref) {
8589                         rb_erase(&back->node.node, &rec->backref_tree);
8590                         free(back);
8591                 }
8592         } else {
8593                 struct tree_backref *back;
8594                 back = find_tree_backref(rec, parent, root_objectid);
8595                 if (!back)
8596                         goto out;
8597                 if (back->node.found_ref) {
8598                         if (rec->refs)
8599                                 rec->refs--;
8600                         back->node.found_ref = 0;
8601                 }
8602                 if (back->node.found_extent_tree) {
8603                         if (rec->extent_item_refs)
8604                                 rec->extent_item_refs--;
8605                         back->node.found_extent_tree = 0;
8606                 }
8607                 if (!back->node.found_extent_tree && back->node.found_ref) {
8608                         rb_erase(&back->node.node, &rec->backref_tree);
8609                         free(back);
8610                 }
8611         }
8612         maybe_free_extent_rec(extent_cache, rec);
8613 out:
8614         return 0;
8615 }
8616
8617 static int delete_extent_records(struct btrfs_trans_handle *trans,
8618                                  struct btrfs_root *root,
8619                                  struct btrfs_path *path,
8620                                  u64 bytenr)
8621 {
8622         struct btrfs_key key;
8623         struct btrfs_key found_key;
8624         struct extent_buffer *leaf;
8625         int ret;
8626         int slot;
8627
8628
8629         key.objectid = bytenr;
8630         key.type = (u8)-1;
8631         key.offset = (u64)-1;
8632
8633         while(1) {
8634                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8635                                         &key, path, 0, 1);
8636                 if (ret < 0)
8637                         break;
8638
8639                 if (ret > 0) {
8640                         ret = 0;
8641                         if (path->slots[0] == 0)
8642                                 break;
8643                         path->slots[0]--;
8644                 }
8645                 ret = 0;
8646
8647                 leaf = path->nodes[0];
8648                 slot = path->slots[0];
8649
8650                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8651                 if (found_key.objectid != bytenr)
8652                         break;
8653
8654                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8655                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8656                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8657                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8658                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8659                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8660                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8661                         btrfs_release_path(path);
8662                         if (found_key.type == 0) {
8663                                 if (found_key.offset == 0)
8664                                         break;
8665                                 key.offset = found_key.offset - 1;
8666                                 key.type = found_key.type;
8667                         }
8668                         key.type = found_key.type - 1;
8669                         key.offset = (u64)-1;
8670                         continue;
8671                 }
8672
8673                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8674                         found_key.objectid, found_key.type, found_key.offset);
8675
8676                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8677                 if (ret)
8678                         break;
8679                 btrfs_release_path(path);
8680
8681                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8682                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8683                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8684                                 found_key.offset : root->fs_info->nodesize;
8685
8686                         ret = btrfs_update_block_group(trans, root, bytenr,
8687                                                        bytes, 0, 0);
8688                         if (ret)
8689                                 break;
8690                 }
8691         }
8692
8693         btrfs_release_path(path);
8694         return ret;
8695 }
8696
8697 /*
8698  * for a single backref, this will allocate a new extent
8699  * and add the backref to it.
8700  */
8701 static int record_extent(struct btrfs_trans_handle *trans,
8702                          struct btrfs_fs_info *info,
8703                          struct btrfs_path *path,
8704                          struct extent_record *rec,
8705                          struct extent_backref *back,
8706                          int allocated, u64 flags)
8707 {
8708         int ret = 0;
8709         struct btrfs_root *extent_root = info->extent_root;
8710         struct extent_buffer *leaf;
8711         struct btrfs_key ins_key;
8712         struct btrfs_extent_item *ei;
8713         struct data_backref *dback;
8714         struct btrfs_tree_block_info *bi;
8715
8716         if (!back->is_data)
8717                 rec->max_size = max_t(u64, rec->max_size,
8718                                     info->nodesize);
8719
8720         if (!allocated) {
8721                 u32 item_size = sizeof(*ei);
8722
8723                 if (!back->is_data)
8724                         item_size += sizeof(*bi);
8725
8726                 ins_key.objectid = rec->start;
8727                 ins_key.offset = rec->max_size;
8728                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8729
8730                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8731                                         &ins_key, item_size);
8732                 if (ret)
8733                         goto fail;
8734
8735                 leaf = path->nodes[0];
8736                 ei = btrfs_item_ptr(leaf, path->slots[0],
8737                                     struct btrfs_extent_item);
8738
8739                 btrfs_set_extent_refs(leaf, ei, 0);
8740                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8741
8742                 if (back->is_data) {
8743                         btrfs_set_extent_flags(leaf, ei,
8744                                                BTRFS_EXTENT_FLAG_DATA);
8745                 } else {
8746                         struct btrfs_disk_key copy_key;;
8747
8748                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8749                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8750                                              sizeof(*bi));
8751
8752                         btrfs_set_disk_key_objectid(&copy_key,
8753                                                     rec->info_objectid);
8754                         btrfs_set_disk_key_type(&copy_key, 0);
8755                         btrfs_set_disk_key_offset(&copy_key, 0);
8756
8757                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8758                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8759
8760                         btrfs_set_extent_flags(leaf, ei,
8761                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8762                 }
8763
8764                 btrfs_mark_buffer_dirty(leaf);
8765                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8766                                                rec->max_size, 1, 0);
8767                 if (ret)
8768                         goto fail;
8769                 btrfs_release_path(path);
8770         }
8771
8772         if (back->is_data) {
8773                 u64 parent;
8774                 int i;
8775
8776                 dback = to_data_backref(back);
8777                 if (back->full_backref)
8778                         parent = dback->parent;
8779                 else
8780                         parent = 0;
8781
8782                 for (i = 0; i < dback->found_ref; i++) {
8783                         /* if parent != 0, we're doing a full backref
8784                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8785                          * just makes the backref allocator create a data
8786                          * backref
8787                          */
8788                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8789                                                    rec->start, rec->max_size,
8790                                                    parent,
8791                                                    dback->root,
8792                                                    parent ?
8793                                                    BTRFS_FIRST_FREE_OBJECTID :
8794                                                    dback->owner,
8795                                                    dback->offset);
8796                         if (ret)
8797                                 break;
8798                 }
8799                 fprintf(stderr, "adding new data backref"
8800                                 " on %llu %s %llu owner %llu"
8801                                 " offset %llu found %d\n",
8802                                 (unsigned long long)rec->start,
8803                                 back->full_backref ?
8804                                 "parent" : "root",
8805                                 back->full_backref ?
8806                                 (unsigned long long)parent :
8807                                 (unsigned long long)dback->root,
8808                                 (unsigned long long)dback->owner,
8809                                 (unsigned long long)dback->offset,
8810                                 dback->found_ref);
8811         } else {
8812                 u64 parent;
8813                 struct tree_backref *tback;
8814
8815                 tback = to_tree_backref(back);
8816                 if (back->full_backref)
8817                         parent = tback->parent;
8818                 else
8819                         parent = 0;
8820
8821                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8822                                            rec->start, rec->max_size,
8823                                            parent, tback->root, 0, 0);
8824                 fprintf(stderr, "adding new tree backref on "
8825                         "start %llu len %llu parent %llu root %llu\n",
8826                         rec->start, rec->max_size, parent, tback->root);
8827         }
8828 fail:
8829         btrfs_release_path(path);
8830         return ret;
8831 }
8832
8833 static struct extent_entry *find_entry(struct list_head *entries,
8834                                        u64 bytenr, u64 bytes)
8835 {
8836         struct extent_entry *entry = NULL;
8837
8838         list_for_each_entry(entry, entries, list) {
8839                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8840                         return entry;
8841         }
8842
8843         return NULL;
8844 }
8845
8846 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8847 {
8848         struct extent_entry *entry, *best = NULL, *prev = NULL;
8849
8850         list_for_each_entry(entry, entries, list) {
8851                 /*
8852                  * If there are as many broken entries as entries then we know
8853                  * not to trust this particular entry.
8854                  */
8855                 if (entry->broken == entry->count)
8856                         continue;
8857
8858                 /*
8859                  * Special case, when there are only two entries and 'best' is
8860                  * the first one
8861                  */
8862                 if (!prev) {
8863                         best = entry;
8864                         prev = entry;
8865                         continue;
8866                 }
8867
8868                 /*
8869                  * If our current entry == best then we can't be sure our best
8870                  * is really the best, so we need to keep searching.
8871                  */
8872                 if (best && best->count == entry->count) {
8873                         prev = entry;
8874                         best = NULL;
8875                         continue;
8876                 }
8877
8878                 /* Prev == entry, not good enough, have to keep searching */
8879                 if (!prev->broken && prev->count == entry->count)
8880                         continue;
8881
8882                 if (!best)
8883                         best = (prev->count > entry->count) ? prev : entry;
8884                 else if (best->count < entry->count)
8885                         best = entry;
8886                 prev = entry;
8887         }
8888
8889         return best;
8890 }
8891
8892 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8893                       struct data_backref *dback, struct extent_entry *entry)
8894 {
8895         struct btrfs_trans_handle *trans;
8896         struct btrfs_root *root;
8897         struct btrfs_file_extent_item *fi;
8898         struct extent_buffer *leaf;
8899         struct btrfs_key key;
8900         u64 bytenr, bytes;
8901         int ret, err;
8902
8903         key.objectid = dback->root;
8904         key.type = BTRFS_ROOT_ITEM_KEY;
8905         key.offset = (u64)-1;
8906         root = btrfs_read_fs_root(info, &key);
8907         if (IS_ERR(root)) {
8908                 fprintf(stderr, "Couldn't find root for our ref\n");
8909                 return -EINVAL;
8910         }
8911
8912         /*
8913          * The backref points to the original offset of the extent if it was
8914          * split, so we need to search down to the offset we have and then walk
8915          * forward until we find the backref we're looking for.
8916          */
8917         key.objectid = dback->owner;
8918         key.type = BTRFS_EXTENT_DATA_KEY;
8919         key.offset = dback->offset;
8920         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8921         if (ret < 0) {
8922                 fprintf(stderr, "Error looking up ref %d\n", ret);
8923                 return ret;
8924         }
8925
8926         while (1) {
8927                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8928                         ret = btrfs_next_leaf(root, path);
8929                         if (ret) {
8930                                 fprintf(stderr, "Couldn't find our ref, next\n");
8931                                 return -EINVAL;
8932                         }
8933                 }
8934                 leaf = path->nodes[0];
8935                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8936                 if (key.objectid != dback->owner ||
8937                     key.type != BTRFS_EXTENT_DATA_KEY) {
8938                         fprintf(stderr, "Couldn't find our ref, search\n");
8939                         return -EINVAL;
8940                 }
8941                 fi = btrfs_item_ptr(leaf, path->slots[0],
8942                                     struct btrfs_file_extent_item);
8943                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8944                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8945
8946                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8947                         break;
8948                 path->slots[0]++;
8949         }
8950
8951         btrfs_release_path(path);
8952
8953         trans = btrfs_start_transaction(root, 1);
8954         if (IS_ERR(trans))
8955                 return PTR_ERR(trans);
8956
8957         /*
8958          * Ok we have the key of the file extent we want to fix, now we can cow
8959          * down to the thing and fix it.
8960          */
8961         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8962         if (ret < 0) {
8963                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8964                         key.objectid, key.type, key.offset, ret);
8965                 goto out;
8966         }
8967         if (ret > 0) {
8968                 fprintf(stderr, "Well that's odd, we just found this key "
8969                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8970                         key.offset);
8971                 ret = -EINVAL;
8972                 goto out;
8973         }
8974         leaf = path->nodes[0];
8975         fi = btrfs_item_ptr(leaf, path->slots[0],
8976                             struct btrfs_file_extent_item);
8977
8978         if (btrfs_file_extent_compression(leaf, fi) &&
8979             dback->disk_bytenr != entry->bytenr) {
8980                 fprintf(stderr, "Ref doesn't match the record start and is "
8981                         "compressed, please take a btrfs-image of this file "
8982                         "system and send it to a btrfs developer so they can "
8983                         "complete this functionality for bytenr %Lu\n",
8984                         dback->disk_bytenr);
8985                 ret = -EINVAL;
8986                 goto out;
8987         }
8988
8989         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8990                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8991         } else if (dback->disk_bytenr > entry->bytenr) {
8992                 u64 off_diff, offset;
8993
8994                 off_diff = dback->disk_bytenr - entry->bytenr;
8995                 offset = btrfs_file_extent_offset(leaf, fi);
8996                 if (dback->disk_bytenr + offset +
8997                     btrfs_file_extent_num_bytes(leaf, fi) >
8998                     entry->bytenr + entry->bytes) {
8999                         fprintf(stderr, "Ref is past the entry end, please "
9000                                 "take a btrfs-image of this file system and "
9001                                 "send it to a btrfs developer, ref %Lu\n",
9002                                 dback->disk_bytenr);
9003                         ret = -EINVAL;
9004                         goto out;
9005                 }
9006                 offset += off_diff;
9007                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9008                 btrfs_set_file_extent_offset(leaf, fi, offset);
9009         } else if (dback->disk_bytenr < entry->bytenr) {
9010                 u64 offset;
9011
9012                 offset = btrfs_file_extent_offset(leaf, fi);
9013                 if (dback->disk_bytenr + offset < entry->bytenr) {
9014                         fprintf(stderr, "Ref is before the entry start, please"
9015                                 " take a btrfs-image of this file system and "
9016                                 "send it to a btrfs developer, ref %Lu\n",
9017                                 dback->disk_bytenr);
9018                         ret = -EINVAL;
9019                         goto out;
9020                 }
9021
9022                 offset += dback->disk_bytenr;
9023                 offset -= entry->bytenr;
9024                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9025                 btrfs_set_file_extent_offset(leaf, fi, offset);
9026         }
9027
9028         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9029
9030         /*
9031          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9032          * only do this if we aren't using compression, otherwise it's a
9033          * trickier case.
9034          */
9035         if (!btrfs_file_extent_compression(leaf, fi))
9036                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9037         else
9038                 printf("ram bytes may be wrong?\n");
9039         btrfs_mark_buffer_dirty(leaf);
9040 out:
9041         err = btrfs_commit_transaction(trans, root);
9042         btrfs_release_path(path);
9043         return ret ? ret : err;
9044 }
9045
9046 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9047                            struct extent_record *rec)
9048 {
9049         struct extent_backref *back, *tmp;
9050         struct data_backref *dback;
9051         struct extent_entry *entry, *best = NULL;
9052         LIST_HEAD(entries);
9053         int nr_entries = 0;
9054         int broken_entries = 0;
9055         int ret = 0;
9056         short mismatch = 0;
9057
9058         /*
9059          * Metadata is easy and the backrefs should always agree on bytenr and
9060          * size, if not we've got bigger issues.
9061          */
9062         if (rec->metadata)
9063                 return 0;
9064
9065         rbtree_postorder_for_each_entry_safe(back, tmp,
9066                                              &rec->backref_tree, node) {
9067                 if (back->full_backref || !back->is_data)
9068                         continue;
9069
9070                 dback = to_data_backref(back);
9071
9072                 /*
9073                  * We only pay attention to backrefs that we found a real
9074                  * backref for.
9075                  */
9076                 if (dback->found_ref == 0)
9077                         continue;
9078
9079                 /*
9080                  * For now we only catch when the bytes don't match, not the
9081                  * bytenr.  We can easily do this at the same time, but I want
9082                  * to have a fs image to test on before we just add repair
9083                  * functionality willy-nilly so we know we won't screw up the
9084                  * repair.
9085                  */
9086
9087                 entry = find_entry(&entries, dback->disk_bytenr,
9088                                    dback->bytes);
9089                 if (!entry) {
9090                         entry = malloc(sizeof(struct extent_entry));
9091                         if (!entry) {
9092                                 ret = -ENOMEM;
9093                                 goto out;
9094                         }
9095                         memset(entry, 0, sizeof(*entry));
9096                         entry->bytenr = dback->disk_bytenr;
9097                         entry->bytes = dback->bytes;
9098                         list_add_tail(&entry->list, &entries);
9099                         nr_entries++;
9100                 }
9101
9102                 /*
9103                  * If we only have on entry we may think the entries agree when
9104                  * in reality they don't so we have to do some extra checking.
9105                  */
9106                 if (dback->disk_bytenr != rec->start ||
9107                     dback->bytes != rec->nr || back->broken)
9108                         mismatch = 1;
9109
9110                 if (back->broken) {
9111                         entry->broken++;
9112                         broken_entries++;
9113                 }
9114
9115                 entry->count++;
9116         }
9117
9118         /* Yay all the backrefs agree, carry on good sir */
9119         if (nr_entries <= 1 && !mismatch)
9120                 goto out;
9121
9122         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9123                 "%Lu\n", rec->start);
9124
9125         /*
9126          * First we want to see if the backrefs can agree amongst themselves who
9127          * is right, so figure out which one of the entries has the highest
9128          * count.
9129          */
9130         best = find_most_right_entry(&entries);
9131
9132         /*
9133          * Ok so we may have an even split between what the backrefs think, so
9134          * this is where we use the extent ref to see what it thinks.
9135          */
9136         if (!best) {
9137                 entry = find_entry(&entries, rec->start, rec->nr);
9138                 if (!entry && (!broken_entries || !rec->found_rec)) {
9139                         fprintf(stderr, "Backrefs don't agree with each other "
9140                                 "and extent record doesn't agree with anybody,"
9141                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9142                                 rec->start, rec->nr);
9143                         ret = -EINVAL;
9144                         goto out;
9145                 } else if (!entry) {
9146                         /*
9147                          * Ok our backrefs were broken, we'll assume this is the
9148                          * correct value and add an entry for this range.
9149                          */
9150                         entry = malloc(sizeof(struct extent_entry));
9151                         if (!entry) {
9152                                 ret = -ENOMEM;
9153                                 goto out;
9154                         }
9155                         memset(entry, 0, sizeof(*entry));
9156                         entry->bytenr = rec->start;
9157                         entry->bytes = rec->nr;
9158                         list_add_tail(&entry->list, &entries);
9159                         nr_entries++;
9160                 }
9161                 entry->count++;
9162                 best = find_most_right_entry(&entries);
9163                 if (!best) {
9164                         fprintf(stderr, "Backrefs and extent record evenly "
9165                                 "split on who is right, this is going to "
9166                                 "require user input to fix bytenr %Lu bytes "
9167                                 "%Lu\n", rec->start, rec->nr);
9168                         ret = -EINVAL;
9169                         goto out;
9170                 }
9171         }
9172
9173         /*
9174          * I don't think this can happen currently as we'll abort() if we catch
9175          * this case higher up, but in case somebody removes that we still can't
9176          * deal with it properly here yet, so just bail out of that's the case.
9177          */
9178         if (best->bytenr != rec->start) {
9179                 fprintf(stderr, "Extent start and backref starts don't match, "
9180                         "please use btrfs-image on this file system and send "
9181                         "it to a btrfs developer so they can make fsck fix "
9182                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9183                         rec->start, rec->nr);
9184                 ret = -EINVAL;
9185                 goto out;
9186         }
9187
9188         /*
9189          * Ok great we all agreed on an extent record, let's go find the real
9190          * references and fix up the ones that don't match.
9191          */
9192         rbtree_postorder_for_each_entry_safe(back, tmp,
9193                                              &rec->backref_tree, node) {
9194                 if (back->full_backref || !back->is_data)
9195                         continue;
9196
9197                 dback = to_data_backref(back);
9198
9199                 /*
9200                  * Still ignoring backrefs that don't have a real ref attached
9201                  * to them.
9202                  */
9203                 if (dback->found_ref == 0)
9204                         continue;
9205
9206                 if (dback->bytes == best->bytes &&
9207                     dback->disk_bytenr == best->bytenr)
9208                         continue;
9209
9210                 ret = repair_ref(info, path, dback, best);
9211                 if (ret)
9212                         goto out;
9213         }
9214
9215         /*
9216          * Ok we messed with the actual refs, which means we need to drop our
9217          * entire cache and go back and rescan.  I know this is a huge pain and
9218          * adds a lot of extra work, but it's the only way to be safe.  Once all
9219          * the backrefs agree we may not need to do anything to the extent
9220          * record itself.
9221          */
9222         ret = -EAGAIN;
9223 out:
9224         while (!list_empty(&entries)) {
9225                 entry = list_entry(entries.next, struct extent_entry, list);
9226                 list_del_init(&entry->list);
9227                 free(entry);
9228         }
9229         return ret;
9230 }
9231
9232 static int process_duplicates(struct cache_tree *extent_cache,
9233                               struct extent_record *rec)
9234 {
9235         struct extent_record *good, *tmp;
9236         struct cache_extent *cache;
9237         int ret;
9238
9239         /*
9240          * If we found a extent record for this extent then return, or if we
9241          * have more than one duplicate we are likely going to need to delete
9242          * something.
9243          */
9244         if (rec->found_rec || rec->num_duplicates > 1)
9245                 return 0;
9246
9247         /* Shouldn't happen but just in case */
9248         BUG_ON(!rec->num_duplicates);
9249
9250         /*
9251          * So this happens if we end up with a backref that doesn't match the
9252          * actual extent entry.  So either the backref is bad or the extent
9253          * entry is bad.  Either way we want to have the extent_record actually
9254          * reflect what we found in the extent_tree, so we need to take the
9255          * duplicate out and use that as the extent_record since the only way we
9256          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9257          */
9258         remove_cache_extent(extent_cache, &rec->cache);
9259
9260         good = to_extent_record(rec->dups.next);
9261         list_del_init(&good->list);
9262         INIT_LIST_HEAD(&good->backrefs);
9263         INIT_LIST_HEAD(&good->dups);
9264         good->cache.start = good->start;
9265         good->cache.size = good->nr;
9266         good->content_checked = 0;
9267         good->owner_ref_checked = 0;
9268         good->num_duplicates = 0;
9269         good->refs = rec->refs;
9270         list_splice_init(&rec->backrefs, &good->backrefs);
9271         while (1) {
9272                 cache = lookup_cache_extent(extent_cache, good->start,
9273                                             good->nr);
9274                 if (!cache)
9275                         break;
9276                 tmp = container_of(cache, struct extent_record, cache);
9277
9278                 /*
9279                  * If we find another overlapping extent and it's found_rec is
9280                  * set then it's a duplicate and we need to try and delete
9281                  * something.
9282                  */
9283                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9284                         if (list_empty(&good->list))
9285                                 list_add_tail(&good->list,
9286                                               &duplicate_extents);
9287                         good->num_duplicates += tmp->num_duplicates + 1;
9288                         list_splice_init(&tmp->dups, &good->dups);
9289                         list_del_init(&tmp->list);
9290                         list_add_tail(&tmp->list, &good->dups);
9291                         remove_cache_extent(extent_cache, &tmp->cache);
9292                         continue;
9293                 }
9294
9295                 /*
9296                  * Ok we have another non extent item backed extent rec, so lets
9297                  * just add it to this extent and carry on like we did above.
9298                  */
9299                 good->refs += tmp->refs;
9300                 list_splice_init(&tmp->backrefs, &good->backrefs);
9301                 remove_cache_extent(extent_cache, &tmp->cache);
9302                 free(tmp);
9303         }
9304         ret = insert_cache_extent(extent_cache, &good->cache);
9305         BUG_ON(ret);
9306         free(rec);
9307         return good->num_duplicates ? 0 : 1;
9308 }
9309
9310 static int delete_duplicate_records(struct btrfs_root *root,
9311                                     struct extent_record *rec)
9312 {
9313         struct btrfs_trans_handle *trans;
9314         LIST_HEAD(delete_list);
9315         struct btrfs_path path;
9316         struct extent_record *tmp, *good, *n;
9317         int nr_del = 0;
9318         int ret = 0, err;
9319         struct btrfs_key key;
9320
9321         btrfs_init_path(&path);
9322
9323         good = rec;
9324         /* Find the record that covers all of the duplicates. */
9325         list_for_each_entry(tmp, &rec->dups, list) {
9326                 if (good->start < tmp->start)
9327                         continue;
9328                 if (good->nr > tmp->nr)
9329                         continue;
9330
9331                 if (tmp->start + tmp->nr < good->start + good->nr) {
9332                         fprintf(stderr, "Ok we have overlapping extents that "
9333                                 "aren't completely covered by each other, this "
9334                                 "is going to require more careful thought.  "
9335                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9336                                 tmp->start, tmp->nr, good->start, good->nr);
9337                         abort();
9338                 }
9339                 good = tmp;
9340         }
9341
9342         if (good != rec)
9343                 list_add_tail(&rec->list, &delete_list);
9344
9345         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9346                 if (tmp == good)
9347                         continue;
9348                 list_move_tail(&tmp->list, &delete_list);
9349         }
9350
9351         root = root->fs_info->extent_root;
9352         trans = btrfs_start_transaction(root, 1);
9353         if (IS_ERR(trans)) {
9354                 ret = PTR_ERR(trans);
9355                 goto out;
9356         }
9357
9358         list_for_each_entry(tmp, &delete_list, list) {
9359                 if (tmp->found_rec == 0)
9360                         continue;
9361                 key.objectid = tmp->start;
9362                 key.type = BTRFS_EXTENT_ITEM_KEY;
9363                 key.offset = tmp->nr;
9364
9365                 /* Shouldn't happen but just in case */
9366                 if (tmp->metadata) {
9367                         fprintf(stderr, "Well this shouldn't happen, extent "
9368                                 "record overlaps but is metadata? "
9369                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9370                         abort();
9371                 }
9372
9373                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9374                 if (ret) {
9375                         if (ret > 0)
9376                                 ret = -EINVAL;
9377                         break;
9378                 }
9379                 ret = btrfs_del_item(trans, root, &path);
9380                 if (ret)
9381                         break;
9382                 btrfs_release_path(&path);
9383                 nr_del++;
9384         }
9385         err = btrfs_commit_transaction(trans, root);
9386         if (err && !ret)
9387                 ret = err;
9388 out:
9389         while (!list_empty(&delete_list)) {
9390                 tmp = to_extent_record(delete_list.next);
9391                 list_del_init(&tmp->list);
9392                 if (tmp == rec)
9393                         continue;
9394                 free(tmp);
9395         }
9396
9397         while (!list_empty(&rec->dups)) {
9398                 tmp = to_extent_record(rec->dups.next);
9399                 list_del_init(&tmp->list);
9400                 free(tmp);
9401         }
9402
9403         btrfs_release_path(&path);
9404
9405         if (!ret && !nr_del)
9406                 rec->num_duplicates = 0;
9407
9408         return ret ? ret : nr_del;
9409 }
9410
9411 static int find_possible_backrefs(struct btrfs_fs_info *info,
9412                                   struct btrfs_path *path,
9413                                   struct cache_tree *extent_cache,
9414                                   struct extent_record *rec)
9415 {
9416         struct btrfs_root *root;
9417         struct extent_backref *back, *tmp;
9418         struct data_backref *dback;
9419         struct cache_extent *cache;
9420         struct btrfs_file_extent_item *fi;
9421         struct btrfs_key key;
9422         u64 bytenr, bytes;
9423         int ret;
9424
9425         rbtree_postorder_for_each_entry_safe(back, tmp,
9426                                              &rec->backref_tree, node) {
9427                 /* Don't care about full backrefs (poor unloved backrefs) */
9428                 if (back->full_backref || !back->is_data)
9429                         continue;
9430
9431                 dback = to_data_backref(back);
9432
9433                 /* We found this one, we don't need to do a lookup */
9434                 if (dback->found_ref)
9435                         continue;
9436
9437                 key.objectid = dback->root;
9438                 key.type = BTRFS_ROOT_ITEM_KEY;
9439                 key.offset = (u64)-1;
9440
9441                 root = btrfs_read_fs_root(info, &key);
9442
9443                 /* No root, definitely a bad ref, skip */
9444                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9445                         continue;
9446                 /* Other err, exit */
9447                 if (IS_ERR(root))
9448                         return PTR_ERR(root);
9449
9450                 key.objectid = dback->owner;
9451                 key.type = BTRFS_EXTENT_DATA_KEY;
9452                 key.offset = dback->offset;
9453                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9454                 if (ret) {
9455                         btrfs_release_path(path);
9456                         if (ret < 0)
9457                                 return ret;
9458                         /* Didn't find it, we can carry on */
9459                         ret = 0;
9460                         continue;
9461                 }
9462
9463                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9464                                     struct btrfs_file_extent_item);
9465                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9466                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9467                 btrfs_release_path(path);
9468                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9469                 if (cache) {
9470                         struct extent_record *tmp;
9471                         tmp = container_of(cache, struct extent_record, cache);
9472
9473                         /*
9474                          * If we found an extent record for the bytenr for this
9475                          * particular backref then we can't add it to our
9476                          * current extent record.  We only want to add backrefs
9477                          * that don't have a corresponding extent item in the
9478                          * extent tree since they likely belong to this record
9479                          * and we need to fix it if it doesn't match bytenrs.
9480                          */
9481                         if  (tmp->found_rec)
9482                                 continue;
9483                 }
9484
9485                 dback->found_ref += 1;
9486                 dback->disk_bytenr = bytenr;
9487                 dback->bytes = bytes;
9488
9489                 /*
9490                  * Set this so the verify backref code knows not to trust the
9491                  * values in this backref.
9492                  */
9493                 back->broken = 1;
9494         }
9495
9496         return 0;
9497 }
9498
9499 /*
9500  * Record orphan data ref into corresponding root.
9501  *
9502  * Return 0 if the extent item contains data ref and recorded.
9503  * Return 1 if the extent item contains no useful data ref
9504  *   On that case, it may contains only shared_dataref or metadata backref
9505  *   or the file extent exists(this should be handled by the extent bytenr
9506  *   recovery routine)
9507  * Return <0 if something goes wrong.
9508  */
9509 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9510                                       struct extent_record *rec)
9511 {
9512         struct btrfs_key key;
9513         struct btrfs_root *dest_root;
9514         struct extent_backref *back, *tmp;
9515         struct data_backref *dback;
9516         struct orphan_data_extent *orphan;
9517         struct btrfs_path path;
9518         int recorded_data_ref = 0;
9519         int ret = 0;
9520
9521         if (rec->metadata)
9522                 return 1;
9523         btrfs_init_path(&path);
9524         rbtree_postorder_for_each_entry_safe(back, tmp,
9525                                              &rec->backref_tree, node) {
9526                 if (back->full_backref || !back->is_data ||
9527                     !back->found_extent_tree)
9528                         continue;
9529                 dback = to_data_backref(back);
9530                 if (dback->found_ref)
9531                         continue;
9532                 key.objectid = dback->root;
9533                 key.type = BTRFS_ROOT_ITEM_KEY;
9534                 key.offset = (u64)-1;
9535
9536                 dest_root = btrfs_read_fs_root(fs_info, &key);
9537
9538                 /* For non-exist root we just skip it */
9539                 if (IS_ERR(dest_root) || !dest_root)
9540                         continue;
9541
9542                 key.objectid = dback->owner;
9543                 key.type = BTRFS_EXTENT_DATA_KEY;
9544                 key.offset = dback->offset;
9545
9546                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9547                 btrfs_release_path(&path);
9548                 /*
9549                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9550                  * we need to record it for inode/file extent rebuild.
9551                  * For ret > 0, we record it only for file extent rebuild.
9552                  * For ret == 0, the file extent exists but only bytenr
9553                  * mismatch, let the original bytenr fix routine to handle,
9554                  * don't record it.
9555                  */
9556                 if (ret == 0)
9557                         continue;
9558                 ret = 0;
9559                 orphan = malloc(sizeof(*orphan));
9560                 if (!orphan) {
9561                         ret = -ENOMEM;
9562                         goto out;
9563                 }
9564                 INIT_LIST_HEAD(&orphan->list);
9565                 orphan->root = dback->root;
9566                 orphan->objectid = dback->owner;
9567                 orphan->offset = dback->offset;
9568                 orphan->disk_bytenr = rec->cache.start;
9569                 orphan->disk_len = rec->cache.size;
9570                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9571                 recorded_data_ref = 1;
9572         }
9573 out:
9574         btrfs_release_path(&path);
9575         if (!ret)
9576                 return !recorded_data_ref;
9577         else
9578                 return ret;
9579 }
9580
9581 /*
9582  * when an incorrect extent item is found, this will delete
9583  * all of the existing entries for it and recreate them
9584  * based on what the tree scan found.
9585  */
9586 static int fixup_extent_refs(struct btrfs_fs_info *info,
9587                              struct cache_tree *extent_cache,
9588                              struct extent_record *rec)
9589 {
9590         struct btrfs_trans_handle *trans = NULL;
9591         int ret;
9592         struct btrfs_path path;
9593         struct cache_extent *cache;
9594         struct extent_backref *back, *tmp;
9595         int allocated = 0;
9596         u64 flags = 0;
9597
9598         if (rec->flag_block_full_backref)
9599                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9600
9601         btrfs_init_path(&path);
9602         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9603                 /*
9604                  * Sometimes the backrefs themselves are so broken they don't
9605                  * get attached to any meaningful rec, so first go back and
9606                  * check any of our backrefs that we couldn't find and throw
9607                  * them into the list if we find the backref so that
9608                  * verify_backrefs can figure out what to do.
9609                  */
9610                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9611                 if (ret < 0)
9612                         goto out;
9613         }
9614
9615         /* step one, make sure all of the backrefs agree */
9616         ret = verify_backrefs(info, &path, rec);
9617         if (ret < 0)
9618                 goto out;
9619
9620         trans = btrfs_start_transaction(info->extent_root, 1);
9621         if (IS_ERR(trans)) {
9622                 ret = PTR_ERR(trans);
9623                 goto out;
9624         }
9625
9626         /* step two, delete all the existing records */
9627         ret = delete_extent_records(trans, info->extent_root, &path,
9628                                     rec->start);
9629
9630         if (ret < 0)
9631                 goto out;
9632
9633         /* was this block corrupt?  If so, don't add references to it */
9634         cache = lookup_cache_extent(info->corrupt_blocks,
9635                                     rec->start, rec->max_size);
9636         if (cache) {
9637                 ret = 0;
9638                 goto out;
9639         }
9640
9641         /* step three, recreate all the refs we did find */
9642         rbtree_postorder_for_each_entry_safe(back, tmp,
9643                                              &rec->backref_tree, node) {
9644                 /*
9645                  * if we didn't find any references, don't create a
9646                  * new extent record
9647                  */
9648                 if (!back->found_ref)
9649                         continue;
9650
9651                 rec->bad_full_backref = 0;
9652                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9653                 allocated = 1;
9654
9655                 if (ret)
9656                         goto out;
9657         }
9658 out:
9659         if (trans) {
9660                 int err = btrfs_commit_transaction(trans, info->extent_root);
9661                 if (!ret)
9662                         ret = err;
9663         }
9664
9665         if (!ret)
9666                 fprintf(stderr, "Repaired extent references for %llu\n",
9667                                 (unsigned long long)rec->start);
9668
9669         btrfs_release_path(&path);
9670         return ret;
9671 }
9672
9673 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9674                               struct extent_record *rec)
9675 {
9676         struct btrfs_trans_handle *trans;
9677         struct btrfs_root *root = fs_info->extent_root;
9678         struct btrfs_path path;
9679         struct btrfs_extent_item *ei;
9680         struct btrfs_key key;
9681         u64 flags;
9682         int ret = 0;
9683
9684         key.objectid = rec->start;
9685         if (rec->metadata) {
9686                 key.type = BTRFS_METADATA_ITEM_KEY;
9687                 key.offset = rec->info_level;
9688         } else {
9689                 key.type = BTRFS_EXTENT_ITEM_KEY;
9690                 key.offset = rec->max_size;
9691         }
9692
9693         trans = btrfs_start_transaction(root, 0);
9694         if (IS_ERR(trans))
9695                 return PTR_ERR(trans);
9696
9697         btrfs_init_path(&path);
9698         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9699         if (ret < 0) {
9700                 btrfs_release_path(&path);
9701                 btrfs_commit_transaction(trans, root);
9702                 return ret;
9703         } else if (ret) {
9704                 fprintf(stderr, "Didn't find extent for %llu\n",
9705                         (unsigned long long)rec->start);
9706                 btrfs_release_path(&path);
9707                 btrfs_commit_transaction(trans, root);
9708                 return -ENOENT;
9709         }
9710
9711         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9712                             struct btrfs_extent_item);
9713         flags = btrfs_extent_flags(path.nodes[0], ei);
9714         if (rec->flag_block_full_backref) {
9715                 fprintf(stderr, "setting full backref on %llu\n",
9716                         (unsigned long long)key.objectid);
9717                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9718         } else {
9719                 fprintf(stderr, "clearing full backref on %llu\n",
9720                         (unsigned long long)key.objectid);
9721                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9722         }
9723         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9724         btrfs_mark_buffer_dirty(path.nodes[0]);
9725         btrfs_release_path(&path);
9726         ret = btrfs_commit_transaction(trans, root);
9727         if (!ret)
9728                 fprintf(stderr, "Repaired extent flags for %llu\n",
9729                                 (unsigned long long)rec->start);
9730
9731         return ret;
9732 }
9733
9734 /* right now we only prune from the extent allocation tree */
9735 static int prune_one_block(struct btrfs_trans_handle *trans,
9736                            struct btrfs_fs_info *info,
9737                            struct btrfs_corrupt_block *corrupt)
9738 {
9739         int ret;
9740         struct btrfs_path path;
9741         struct extent_buffer *eb;
9742         u64 found;
9743         int slot;
9744         int nritems;
9745         int level = corrupt->level + 1;
9746
9747         btrfs_init_path(&path);
9748 again:
9749         /* we want to stop at the parent to our busted block */
9750         path.lowest_level = level;
9751
9752         ret = btrfs_search_slot(trans, info->extent_root,
9753                                 &corrupt->key, &path, -1, 1);
9754
9755         if (ret < 0)
9756                 goto out;
9757
9758         eb = path.nodes[level];
9759         if (!eb) {
9760                 ret = -ENOENT;
9761                 goto out;
9762         }
9763
9764         /*
9765          * hopefully the search gave us the block we want to prune,
9766          * lets try that first
9767          */
9768         slot = path.slots[level];
9769         found =  btrfs_node_blockptr(eb, slot);
9770         if (found == corrupt->cache.start)
9771                 goto del_ptr;
9772
9773         nritems = btrfs_header_nritems(eb);
9774
9775         /* the search failed, lets scan this node and hope we find it */
9776         for (slot = 0; slot < nritems; slot++) {
9777                 found =  btrfs_node_blockptr(eb, slot);
9778                 if (found == corrupt->cache.start)
9779                         goto del_ptr;
9780         }
9781         /*
9782          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9783          * to this block
9784          */
9785         if (eb == info->extent_root->node) {
9786                 ret = -ENOENT;
9787                 goto out;
9788         } else {
9789                 level++;
9790                 btrfs_release_path(&path);
9791                 goto again;
9792         }
9793
9794 del_ptr:
9795         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9796         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9797
9798 out:
9799         btrfs_release_path(&path);
9800         return ret;
9801 }
9802
9803 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9804 {
9805         struct btrfs_trans_handle *trans = NULL;
9806         struct cache_extent *cache;
9807         struct btrfs_corrupt_block *corrupt;
9808
9809         while (1) {
9810                 cache = search_cache_extent(info->corrupt_blocks, 0);
9811                 if (!cache)
9812                         break;
9813                 if (!trans) {
9814                         trans = btrfs_start_transaction(info->extent_root, 1);
9815                         if (IS_ERR(trans))
9816                                 return PTR_ERR(trans);
9817                 }
9818                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9819                 prune_one_block(trans, info, corrupt);
9820                 remove_cache_extent(info->corrupt_blocks, cache);
9821         }
9822         if (trans)
9823                 return btrfs_commit_transaction(trans, info->extent_root);
9824         return 0;
9825 }
9826
9827 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9828 {
9829         struct btrfs_block_group_cache *cache;
9830         u64 start, end;
9831         int ret;
9832
9833         while (1) {
9834                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9835                                             &start, &end, EXTENT_DIRTY);
9836                 if (ret)
9837                         break;
9838                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9839         }
9840
9841         start = 0;
9842         while (1) {
9843                 cache = btrfs_lookup_first_block_group(fs_info, start);
9844                 if (!cache)
9845                         break;
9846                 if (cache->cached)
9847                         cache->cached = 0;
9848                 start = cache->key.objectid + cache->key.offset;
9849         }
9850 }
9851
9852 static int check_extent_refs(struct btrfs_root *root,
9853                              struct cache_tree *extent_cache)
9854 {
9855         struct extent_record *rec;
9856         struct cache_extent *cache;
9857         int ret = 0;
9858         int had_dups = 0;
9859
9860         if (repair) {
9861                 /*
9862                  * if we're doing a repair, we have to make sure
9863                  * we don't allocate from the problem extents.
9864                  * In the worst case, this will be all the
9865                  * extents in the FS
9866                  */
9867                 cache = search_cache_extent(extent_cache, 0);
9868                 while(cache) {
9869                         rec = container_of(cache, struct extent_record, cache);
9870                         set_extent_dirty(root->fs_info->excluded_extents,
9871                                          rec->start,
9872                                          rec->start + rec->max_size - 1);
9873                         cache = next_cache_extent(cache);
9874                 }
9875
9876                 /* pin down all the corrupted blocks too */
9877                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9878                 while(cache) {
9879                         set_extent_dirty(root->fs_info->excluded_extents,
9880                                          cache->start,
9881                                          cache->start + cache->size - 1);
9882                         cache = next_cache_extent(cache);
9883                 }
9884                 prune_corrupt_blocks(root->fs_info);
9885                 reset_cached_block_groups(root->fs_info);
9886         }
9887
9888         reset_cached_block_groups(root->fs_info);
9889
9890         /*
9891          * We need to delete any duplicate entries we find first otherwise we
9892          * could mess up the extent tree when we have backrefs that actually
9893          * belong to a different extent item and not the weird duplicate one.
9894          */
9895         while (repair && !list_empty(&duplicate_extents)) {
9896                 rec = to_extent_record(duplicate_extents.next);
9897                 list_del_init(&rec->list);
9898
9899                 /* Sometimes we can find a backref before we find an actual
9900                  * extent, so we need to process it a little bit to see if there
9901                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9902                  * if this is a backref screwup.  If we need to delete stuff
9903                  * process_duplicates() will return 0, otherwise it will return
9904                  * 1 and we
9905                  */
9906                 if (process_duplicates(extent_cache, rec))
9907                         continue;
9908                 ret = delete_duplicate_records(root, rec);
9909                 if (ret < 0)
9910                         return ret;
9911                 /*
9912                  * delete_duplicate_records will return the number of entries
9913                  * deleted, so if it's greater than 0 then we know we actually
9914                  * did something and we need to remove.
9915                  */
9916                 if (ret)
9917                         had_dups = 1;
9918         }
9919
9920         if (had_dups)
9921                 return -EAGAIN;
9922
9923         while(1) {
9924                 int cur_err = 0;
9925                 int fix = 0;
9926
9927                 cache = search_cache_extent(extent_cache, 0);
9928                 if (!cache)
9929                         break;
9930                 rec = container_of(cache, struct extent_record, cache);
9931                 if (rec->num_duplicates) {
9932                         fprintf(stderr, "extent item %llu has multiple extent "
9933                                 "items\n", (unsigned long long)rec->start);
9934                         cur_err = 1;
9935                 }
9936
9937                 if (rec->refs != rec->extent_item_refs) {
9938                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9939                                 (unsigned long long)rec->start,
9940                                 (unsigned long long)rec->nr);
9941                         fprintf(stderr, "extent item %llu, found %llu\n",
9942                                 (unsigned long long)rec->extent_item_refs,
9943                                 (unsigned long long)rec->refs);
9944                         ret = record_orphan_data_extents(root->fs_info, rec);
9945                         if (ret < 0)
9946                                 goto repair_abort;
9947                         fix = ret;
9948                         cur_err = 1;
9949                 }
9950                 if (all_backpointers_checked(rec, 1)) {
9951                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9952                                 (unsigned long long)rec->start,
9953                                 (unsigned long long)rec->nr);
9954                         fix = 1;
9955                         cur_err = 1;
9956                 }
9957                 if (!rec->owner_ref_checked) {
9958                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9959                                 (unsigned long long)rec->start,
9960                                 (unsigned long long)rec->nr);
9961                         fix = 1;
9962                         cur_err = 1;
9963                 }
9964
9965                 if (repair && fix) {
9966                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9967                         if (ret)
9968                                 goto repair_abort;
9969                 }
9970
9971
9972                 if (rec->bad_full_backref) {
9973                         fprintf(stderr, "bad full backref, on [%llu]\n",
9974                                 (unsigned long long)rec->start);
9975                         if (repair) {
9976                                 ret = fixup_extent_flags(root->fs_info, rec);
9977                                 if (ret)
9978                                         goto repair_abort;
9979                                 fix = 1;
9980                         }
9981                         cur_err = 1;
9982                 }
9983                 /*
9984                  * Although it's not a extent ref's problem, we reuse this
9985                  * routine for error reporting.
9986                  * No repair function yet.
9987                  */
9988                 if (rec->crossing_stripes) {
9989                         fprintf(stderr,
9990                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9991                                 rec->start, rec->start + rec->max_size);
9992                         cur_err = 1;
9993                 }
9994
9995                 if (rec->wrong_chunk_type) {
9996                         fprintf(stderr,
9997                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9998                                 rec->start, rec->start + rec->max_size);
9999                         cur_err = 1;
10000                 }
10001
10002                 remove_cache_extent(extent_cache, cache);
10003                 free_all_extent_backrefs(rec);
10004                 if (!init_extent_tree && repair && (!cur_err || fix))
10005                         clear_extent_dirty(root->fs_info->excluded_extents,
10006                                            rec->start,
10007                                            rec->start + rec->max_size - 1);
10008                 free(rec);
10009         }
10010 repair_abort:
10011         if (repair) {
10012                 if (ret && ret != -EAGAIN) {
10013                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10014                         exit(1);
10015                 } else if (!ret) {
10016                         struct btrfs_trans_handle *trans;
10017
10018                         root = root->fs_info->extent_root;
10019                         trans = btrfs_start_transaction(root, 1);
10020                         if (IS_ERR(trans)) {
10021                                 ret = PTR_ERR(trans);
10022                                 goto repair_abort;
10023                         }
10024
10025                         ret = btrfs_fix_block_accounting(trans, root);
10026                         if (ret)
10027                                 goto repair_abort;
10028                         ret = btrfs_commit_transaction(trans, root);
10029                         if (ret)
10030                                 goto repair_abort;
10031                 }
10032                 return ret;
10033         }
10034         return 0;
10035 }
10036
10037 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10038 {
10039         u64 stripe_size;
10040
10041         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10042                 stripe_size = length;
10043                 stripe_size /= num_stripes;
10044         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10045                 stripe_size = length * 2;
10046                 stripe_size /= num_stripes;
10047         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10048                 stripe_size = length;
10049                 stripe_size /= (num_stripes - 1);
10050         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10051                 stripe_size = length;
10052                 stripe_size /= (num_stripes - 2);
10053         } else {
10054                 stripe_size = length;
10055         }
10056         return stripe_size;
10057 }
10058
10059 /*
10060  * Check the chunk with its block group/dev list ref:
10061  * Return 0 if all refs seems valid.
10062  * Return 1 if part of refs seems valid, need later check for rebuild ref
10063  * like missing block group and needs to search extent tree to rebuild them.
10064  * Return -1 if essential refs are missing and unable to rebuild.
10065  */
10066 static int check_chunk_refs(struct chunk_record *chunk_rec,
10067                             struct block_group_tree *block_group_cache,
10068                             struct device_extent_tree *dev_extent_cache,
10069                             int silent)
10070 {
10071         struct cache_extent *block_group_item;
10072         struct block_group_record *block_group_rec;
10073         struct cache_extent *dev_extent_item;
10074         struct device_extent_record *dev_extent_rec;
10075         u64 devid;
10076         u64 offset;
10077         u64 length;
10078         int metadump_v2 = 0;
10079         int i;
10080         int ret = 0;
10081
10082         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10083                                                chunk_rec->offset,
10084                                                chunk_rec->length);
10085         if (block_group_item) {
10086                 block_group_rec = container_of(block_group_item,
10087                                                struct block_group_record,
10088                                                cache);
10089                 if (chunk_rec->length != block_group_rec->offset ||
10090                     chunk_rec->offset != block_group_rec->objectid ||
10091                     (!metadump_v2 &&
10092                      chunk_rec->type_flags != block_group_rec->flags)) {
10093                         if (!silent)
10094                                 fprintf(stderr,
10095                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10096                                         chunk_rec->objectid,
10097                                         chunk_rec->type,
10098                                         chunk_rec->offset,
10099                                         chunk_rec->length,
10100                                         chunk_rec->offset,
10101                                         chunk_rec->type_flags,
10102                                         block_group_rec->objectid,
10103                                         block_group_rec->type,
10104                                         block_group_rec->offset,
10105                                         block_group_rec->offset,
10106                                         block_group_rec->objectid,
10107                                         block_group_rec->flags);
10108                         ret = -1;
10109                 } else {
10110                         list_del_init(&block_group_rec->list);
10111                         chunk_rec->bg_rec = block_group_rec;
10112                 }
10113         } else {
10114                 if (!silent)
10115                         fprintf(stderr,
10116                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10117                                 chunk_rec->objectid,
10118                                 chunk_rec->type,
10119                                 chunk_rec->offset,
10120                                 chunk_rec->length,
10121                                 chunk_rec->offset,
10122                                 chunk_rec->type_flags);
10123                 ret = 1;
10124         }
10125
10126         if (metadump_v2)
10127                 return ret;
10128
10129         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10130                                     chunk_rec->num_stripes);
10131         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10132                 devid = chunk_rec->stripes[i].devid;
10133                 offset = chunk_rec->stripes[i].offset;
10134                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10135                                                        devid, offset, length);
10136                 if (dev_extent_item) {
10137                         dev_extent_rec = container_of(dev_extent_item,
10138                                                 struct device_extent_record,
10139                                                 cache);
10140                         if (dev_extent_rec->objectid != devid ||
10141                             dev_extent_rec->offset != offset ||
10142                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10143                             dev_extent_rec->length != length) {
10144                                 if (!silent)
10145                                         fprintf(stderr,
10146                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10147                                                 chunk_rec->objectid,
10148                                                 chunk_rec->type,
10149                                                 chunk_rec->offset,
10150                                                 chunk_rec->stripes[i].devid,
10151                                                 chunk_rec->stripes[i].offset,
10152                                                 dev_extent_rec->objectid,
10153                                                 dev_extent_rec->offset,
10154                                                 dev_extent_rec->length);
10155                                 ret = -1;
10156                         } else {
10157                                 list_move(&dev_extent_rec->chunk_list,
10158                                           &chunk_rec->dextents);
10159                         }
10160                 } else {
10161                         if (!silent)
10162                                 fprintf(stderr,
10163                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10164                                         chunk_rec->objectid,
10165                                         chunk_rec->type,
10166                                         chunk_rec->offset,
10167                                         chunk_rec->stripes[i].devid,
10168                                         chunk_rec->stripes[i].offset);
10169                         ret = -1;
10170                 }
10171         }
10172         return ret;
10173 }
10174
10175 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10176 int check_chunks(struct cache_tree *chunk_cache,
10177                  struct block_group_tree *block_group_cache,
10178                  struct device_extent_tree *dev_extent_cache,
10179                  struct list_head *good, struct list_head *bad,
10180                  struct list_head *rebuild, int silent)
10181 {
10182         struct cache_extent *chunk_item;
10183         struct chunk_record *chunk_rec;
10184         struct block_group_record *bg_rec;
10185         struct device_extent_record *dext_rec;
10186         int err;
10187         int ret = 0;
10188
10189         chunk_item = first_cache_extent(chunk_cache);
10190         while (chunk_item) {
10191                 chunk_rec = container_of(chunk_item, struct chunk_record,
10192                                          cache);
10193                 err = check_chunk_refs(chunk_rec, block_group_cache,
10194                                        dev_extent_cache, silent);
10195                 if (err < 0)
10196                         ret = err;
10197                 if (err == 0 && good)
10198                         list_add_tail(&chunk_rec->list, good);
10199                 if (err > 0 && rebuild)
10200                         list_add_tail(&chunk_rec->list, rebuild);
10201                 if (err < 0 && bad)
10202                         list_add_tail(&chunk_rec->list, bad);
10203                 chunk_item = next_cache_extent(chunk_item);
10204         }
10205
10206         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10207                 if (!silent)
10208                         fprintf(stderr,
10209                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10210                                 bg_rec->objectid,
10211                                 bg_rec->offset,
10212                                 bg_rec->flags);
10213                 if (!ret)
10214                         ret = 1;
10215         }
10216
10217         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10218                             chunk_list) {
10219                 if (!silent)
10220                         fprintf(stderr,
10221                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10222                                 dext_rec->objectid,
10223                                 dext_rec->offset,
10224                                 dext_rec->length);
10225                 if (!ret)
10226                         ret = 1;
10227         }
10228         return ret;
10229 }
10230
10231
10232 static int check_device_used(struct device_record *dev_rec,
10233                              struct device_extent_tree *dext_cache)
10234 {
10235         struct cache_extent *cache;
10236         struct device_extent_record *dev_extent_rec;
10237         u64 total_byte = 0;
10238
10239         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10240         while (cache) {
10241                 dev_extent_rec = container_of(cache,
10242                                               struct device_extent_record,
10243                                               cache);
10244                 if (dev_extent_rec->objectid != dev_rec->devid)
10245                         break;
10246
10247                 list_del_init(&dev_extent_rec->device_list);
10248                 total_byte += dev_extent_rec->length;
10249                 cache = next_cache_extent(cache);
10250         }
10251
10252         if (total_byte != dev_rec->byte_used) {
10253                 fprintf(stderr,
10254                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10255                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10256                         dev_rec->type, dev_rec->offset);
10257                 return -1;
10258         } else {
10259                 return 0;
10260         }
10261 }
10262
10263 /* check btrfs_dev_item -> btrfs_dev_extent */
10264 static int check_devices(struct rb_root *dev_cache,
10265                          struct device_extent_tree *dev_extent_cache)
10266 {
10267         struct rb_node *dev_node;
10268         struct device_record *dev_rec;
10269         struct device_extent_record *dext_rec;
10270         int err;
10271         int ret = 0;
10272
10273         dev_node = rb_first(dev_cache);
10274         while (dev_node) {
10275                 dev_rec = container_of(dev_node, struct device_record, node);
10276                 err = check_device_used(dev_rec, dev_extent_cache);
10277                 if (err)
10278                         ret = err;
10279
10280                 dev_node = rb_next(dev_node);
10281         }
10282         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10283                             device_list) {
10284                 fprintf(stderr,
10285                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10286                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10287                 if (!ret)
10288                         ret = 1;
10289         }
10290         return ret;
10291 }
10292
10293 static int add_root_item_to_list(struct list_head *head,
10294                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10295                                   u8 level, u8 drop_level,
10296                                   struct btrfs_key *drop_key)
10297 {
10298
10299         struct root_item_record *ri_rec;
10300         ri_rec = malloc(sizeof(*ri_rec));
10301         if (!ri_rec)
10302                 return -ENOMEM;
10303         ri_rec->bytenr = bytenr;
10304         ri_rec->objectid = objectid;
10305         ri_rec->level = level;
10306         ri_rec->drop_level = drop_level;
10307         ri_rec->last_snapshot = last_snapshot;
10308         if (drop_key)
10309                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10310         list_add_tail(&ri_rec->list, head);
10311
10312         return 0;
10313 }
10314
10315 static void free_root_item_list(struct list_head *list)
10316 {
10317         struct root_item_record *ri_rec;
10318
10319         while (!list_empty(list)) {
10320                 ri_rec = list_first_entry(list, struct root_item_record,
10321                                           list);
10322                 list_del_init(&ri_rec->list);
10323                 free(ri_rec);
10324         }
10325 }
10326
10327 static int deal_root_from_list(struct list_head *list,
10328                                struct btrfs_root *root,
10329                                struct block_info *bits,
10330                                int bits_nr,
10331                                struct cache_tree *pending,
10332                                struct cache_tree *seen,
10333                                struct cache_tree *reada,
10334                                struct cache_tree *nodes,
10335                                struct cache_tree *extent_cache,
10336                                struct cache_tree *chunk_cache,
10337                                struct rb_root *dev_cache,
10338                                struct block_group_tree *block_group_cache,
10339                                struct device_extent_tree *dev_extent_cache)
10340 {
10341         int ret = 0;
10342         u64 last;
10343
10344         while (!list_empty(list)) {
10345                 struct root_item_record *rec;
10346                 struct extent_buffer *buf;
10347                 rec = list_entry(list->next,
10348                                  struct root_item_record, list);
10349                 last = 0;
10350                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10351                 if (!extent_buffer_uptodate(buf)) {
10352                         free_extent_buffer(buf);
10353                         ret = -EIO;
10354                         break;
10355                 }
10356                 ret = add_root_to_pending(buf, extent_cache, pending,
10357                                     seen, nodes, rec->objectid);
10358                 if (ret < 0)
10359                         break;
10360                 /*
10361                  * To rebuild extent tree, we need deal with snapshot
10362                  * one by one, otherwise we deal with node firstly which
10363                  * can maximize readahead.
10364                  */
10365                 while (1) {
10366                         ret = run_next_block(root, bits, bits_nr, &last,
10367                                              pending, seen, reada, nodes,
10368                                              extent_cache, chunk_cache,
10369                                              dev_cache, block_group_cache,
10370                                              dev_extent_cache, rec);
10371                         if (ret != 0)
10372                                 break;
10373                 }
10374                 free_extent_buffer(buf);
10375                 list_del(&rec->list);
10376                 free(rec);
10377                 if (ret < 0)
10378                         break;
10379         }
10380         while (ret >= 0) {
10381                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10382                                      reada, nodes, extent_cache, chunk_cache,
10383                                      dev_cache, block_group_cache,
10384                                      dev_extent_cache, NULL);
10385                 if (ret != 0) {
10386                         if (ret > 0)
10387                                 ret = 0;
10388                         break;
10389                 }
10390         }
10391         return ret;
10392 }
10393
10394 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10395 {
10396         struct rb_root dev_cache;
10397         struct cache_tree chunk_cache;
10398         struct block_group_tree block_group_cache;
10399         struct device_extent_tree dev_extent_cache;
10400         struct cache_tree extent_cache;
10401         struct cache_tree seen;
10402         struct cache_tree pending;
10403         struct cache_tree reada;
10404         struct cache_tree nodes;
10405         struct extent_io_tree excluded_extents;
10406         struct cache_tree corrupt_blocks;
10407         struct btrfs_path path;
10408         struct btrfs_key key;
10409         struct btrfs_key found_key;
10410         int ret, err = 0;
10411         struct block_info *bits;
10412         int bits_nr;
10413         struct extent_buffer *leaf;
10414         int slot;
10415         struct btrfs_root_item ri;
10416         struct list_head dropping_trees;
10417         struct list_head normal_trees;
10418         struct btrfs_root *root1;
10419         struct btrfs_root *root;
10420         u64 objectid;
10421         u8 level;
10422
10423         root = fs_info->fs_root;
10424         dev_cache = RB_ROOT;
10425         cache_tree_init(&chunk_cache);
10426         block_group_tree_init(&block_group_cache);
10427         device_extent_tree_init(&dev_extent_cache);
10428
10429         cache_tree_init(&extent_cache);
10430         cache_tree_init(&seen);
10431         cache_tree_init(&pending);
10432         cache_tree_init(&nodes);
10433         cache_tree_init(&reada);
10434         cache_tree_init(&corrupt_blocks);
10435         extent_io_tree_init(&excluded_extents);
10436         INIT_LIST_HEAD(&dropping_trees);
10437         INIT_LIST_HEAD(&normal_trees);
10438
10439         if (repair) {
10440                 fs_info->excluded_extents = &excluded_extents;
10441                 fs_info->fsck_extent_cache = &extent_cache;
10442                 fs_info->free_extent_hook = free_extent_hook;
10443                 fs_info->corrupt_blocks = &corrupt_blocks;
10444         }
10445
10446         bits_nr = 1024;
10447         bits = malloc(bits_nr * sizeof(struct block_info));
10448         if (!bits) {
10449                 perror("malloc");
10450                 exit(1);
10451         }
10452
10453         if (ctx.progress_enabled) {
10454                 ctx.tp = TASK_EXTENTS;
10455                 task_start(ctx.info);
10456         }
10457
10458 again:
10459         root1 = fs_info->tree_root;
10460         level = btrfs_header_level(root1->node);
10461         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10462                                     root1->node->start, 0, level, 0, NULL);
10463         if (ret < 0)
10464                 goto out;
10465         root1 = fs_info->chunk_root;
10466         level = btrfs_header_level(root1->node);
10467         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10468                                     root1->node->start, 0, level, 0, NULL);
10469         if (ret < 0)
10470                 goto out;
10471         btrfs_init_path(&path);
10472         key.offset = 0;
10473         key.objectid = 0;
10474         key.type = BTRFS_ROOT_ITEM_KEY;
10475         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10476         if (ret < 0)
10477                 goto out;
10478         while(1) {
10479                 leaf = path.nodes[0];
10480                 slot = path.slots[0];
10481                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10482                         ret = btrfs_next_leaf(root, &path);
10483                         if (ret != 0)
10484                                 break;
10485                         leaf = path.nodes[0];
10486                         slot = path.slots[0];
10487                 }
10488                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10489                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10490                         unsigned long offset;
10491                         u64 last_snapshot;
10492
10493                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10494                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10495                         last_snapshot = btrfs_root_last_snapshot(&ri);
10496                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10497                                 level = btrfs_root_level(&ri);
10498                                 ret = add_root_item_to_list(&normal_trees,
10499                                                 found_key.objectid,
10500                                                 btrfs_root_bytenr(&ri),
10501                                                 last_snapshot, level,
10502                                                 0, NULL);
10503                                 if (ret < 0)
10504                                         goto out;
10505                         } else {
10506                                 level = btrfs_root_level(&ri);
10507                                 objectid = found_key.objectid;
10508                                 btrfs_disk_key_to_cpu(&found_key,
10509                                                       &ri.drop_progress);
10510                                 ret = add_root_item_to_list(&dropping_trees,
10511                                                 objectid,
10512                                                 btrfs_root_bytenr(&ri),
10513                                                 last_snapshot, level,
10514                                                 ri.drop_level, &found_key);
10515                                 if (ret < 0)
10516                                         goto out;
10517                         }
10518                 }
10519                 path.slots[0]++;
10520         }
10521         btrfs_release_path(&path);
10522
10523         /*
10524          * check_block can return -EAGAIN if it fixes something, please keep
10525          * this in mind when dealing with return values from these functions, if
10526          * we get -EAGAIN we want to fall through and restart the loop.
10527          */
10528         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10529                                   &seen, &reada, &nodes, &extent_cache,
10530                                   &chunk_cache, &dev_cache, &block_group_cache,
10531                                   &dev_extent_cache);
10532         if (ret < 0) {
10533                 if (ret == -EAGAIN)
10534                         goto loop;
10535                 goto out;
10536         }
10537         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10538                                   &pending, &seen, &reada, &nodes,
10539                                   &extent_cache, &chunk_cache, &dev_cache,
10540                                   &block_group_cache, &dev_extent_cache);
10541         if (ret < 0) {
10542                 if (ret == -EAGAIN)
10543                         goto loop;
10544                 goto out;
10545         }
10546
10547         ret = check_chunks(&chunk_cache, &block_group_cache,
10548                            &dev_extent_cache, NULL, NULL, NULL, 0);
10549         if (ret) {
10550                 if (ret == -EAGAIN)
10551                         goto loop;
10552                 err = ret;
10553         }
10554
10555         ret = check_extent_refs(root, &extent_cache);
10556         if (ret < 0) {
10557                 if (ret == -EAGAIN)
10558                         goto loop;
10559                 goto out;
10560         }
10561
10562         ret = check_devices(&dev_cache, &dev_extent_cache);
10563         if (ret && err)
10564                 ret = err;
10565
10566 out:
10567         task_stop(ctx.info);
10568         if (repair) {
10569                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10570                 extent_io_tree_cleanup(&excluded_extents);
10571                 fs_info->fsck_extent_cache = NULL;
10572                 fs_info->free_extent_hook = NULL;
10573                 fs_info->corrupt_blocks = NULL;
10574                 fs_info->excluded_extents = NULL;
10575         }
10576         free(bits);
10577         free_chunk_cache_tree(&chunk_cache);
10578         free_device_cache_tree(&dev_cache);
10579         free_block_group_tree(&block_group_cache);
10580         free_device_extent_tree(&dev_extent_cache);
10581         free_extent_cache_tree(&seen);
10582         free_extent_cache_tree(&pending);
10583         free_extent_cache_tree(&reada);
10584         free_extent_cache_tree(&nodes);
10585         free_root_item_list(&normal_trees);
10586         free_root_item_list(&dropping_trees);
10587         return ret;
10588 loop:
10589         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10590         free_extent_cache_tree(&seen);
10591         free_extent_cache_tree(&pending);
10592         free_extent_cache_tree(&reada);
10593         free_extent_cache_tree(&nodes);
10594         free_chunk_cache_tree(&chunk_cache);
10595         free_block_group_tree(&block_group_cache);
10596         free_device_cache_tree(&dev_cache);
10597         free_device_extent_tree(&dev_extent_cache);
10598         free_extent_record_cache(&extent_cache);
10599         free_root_item_list(&normal_trees);
10600         free_root_item_list(&dropping_trees);
10601         extent_io_tree_cleanup(&excluded_extents);
10602         goto again;
10603 }
10604
10605 /*
10606  * Check backrefs of a tree block given by @bytenr or @eb.
10607  *
10608  * @root:       the root containing the @bytenr or @eb
10609  * @eb:         tree block extent buffer, can be NULL
10610  * @bytenr:     bytenr of the tree block to search
10611  * @level:      tree level of the tree block
10612  * @owner:      owner of the tree block
10613  *
10614  * Return >0 for any error found and output error message
10615  * Return 0 for no error found
10616  */
10617 static int check_tree_block_ref(struct btrfs_root *root,
10618                                 struct extent_buffer *eb, u64 bytenr,
10619                                 int level, u64 owner)
10620 {
10621         struct btrfs_key key;
10622         struct btrfs_root *extent_root = root->fs_info->extent_root;
10623         struct btrfs_path path;
10624         struct btrfs_extent_item *ei;
10625         struct btrfs_extent_inline_ref *iref;
10626         struct extent_buffer *leaf;
10627         unsigned long end;
10628         unsigned long ptr;
10629         int slot;
10630         int skinny_level;
10631         int type;
10632         u32 nodesize = root->fs_info->nodesize;
10633         u32 item_size;
10634         u64 offset;
10635         int tree_reloc_root = 0;
10636         int found_ref = 0;
10637         int err = 0;
10638         int ret;
10639
10640         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10641             btrfs_header_bytenr(root->node) == bytenr)
10642                 tree_reloc_root = 1;
10643
10644         btrfs_init_path(&path);
10645         key.objectid = bytenr;
10646         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10647                 key.type = BTRFS_METADATA_ITEM_KEY;
10648         else
10649                 key.type = BTRFS_EXTENT_ITEM_KEY;
10650         key.offset = (u64)-1;
10651
10652         /* Search for the backref in extent tree */
10653         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10654         if (ret < 0) {
10655                 err |= BACKREF_MISSING;
10656                 goto out;
10657         }
10658         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10659         if (ret) {
10660                 err |= BACKREF_MISSING;
10661                 goto out;
10662         }
10663
10664         leaf = path.nodes[0];
10665         slot = path.slots[0];
10666         btrfs_item_key_to_cpu(leaf, &key, slot);
10667
10668         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10669
10670         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10671                 skinny_level = (int)key.offset;
10672                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10673         } else {
10674                 struct btrfs_tree_block_info *info;
10675
10676                 info = (struct btrfs_tree_block_info *)(ei + 1);
10677                 skinny_level = btrfs_tree_block_level(leaf, info);
10678                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10679         }
10680
10681         if (eb) {
10682                 u64 header_gen;
10683                 u64 extent_gen;
10684
10685                 if (!(btrfs_extent_flags(leaf, ei) &
10686                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10687                         error(
10688                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10689                                 key.objectid, nodesize,
10690                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10691                         err = BACKREF_MISMATCH;
10692                 }
10693                 header_gen = btrfs_header_generation(eb);
10694                 extent_gen = btrfs_extent_generation(leaf, ei);
10695                 if (header_gen != extent_gen) {
10696                         error(
10697         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10698                                 key.objectid, nodesize, header_gen,
10699                                 extent_gen);
10700                         err = BACKREF_MISMATCH;
10701                 }
10702                 if (level != skinny_level) {
10703                         error(
10704                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10705                                 key.objectid, nodesize, level, skinny_level);
10706                         err = BACKREF_MISMATCH;
10707                 }
10708                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10709                         error(
10710                         "extent[%llu %u] is referred by other roots than %llu",
10711                                 key.objectid, nodesize, root->objectid);
10712                         err = BACKREF_MISMATCH;
10713                 }
10714         }
10715
10716         /*
10717          * Iterate the extent/metadata item to find the exact backref
10718          */
10719         item_size = btrfs_item_size_nr(leaf, slot);
10720         ptr = (unsigned long)iref;
10721         end = (unsigned long)ei + item_size;
10722         while (ptr < end) {
10723                 iref = (struct btrfs_extent_inline_ref *)ptr;
10724                 type = btrfs_extent_inline_ref_type(leaf, iref);
10725                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10726
10727                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10728                         (offset == root->objectid || offset == owner)) {
10729                         found_ref = 1;
10730                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10731                         /*
10732                          * Backref of tree reloc root points to itself, no need
10733                          * to check backref any more.
10734                          */
10735                         if (tree_reloc_root)
10736                                 found_ref = 1;
10737                         else
10738                         /* Check if the backref points to valid referencer */
10739                                 found_ref = !check_tree_block_ref(root, NULL,
10740                                                 offset, level + 1, owner);
10741                 }
10742
10743                 if (found_ref)
10744                         break;
10745                 ptr += btrfs_extent_inline_ref_size(type);
10746         }
10747
10748         /*
10749          * Inlined extent item doesn't have what we need, check
10750          * TREE_BLOCK_REF_KEY
10751          */
10752         if (!found_ref) {
10753                 btrfs_release_path(&path);
10754                 key.objectid = bytenr;
10755                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10756                 key.offset = root->objectid;
10757
10758                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10759                 if (!ret)
10760                         found_ref = 1;
10761         }
10762         if (!found_ref)
10763                 err |= BACKREF_MISSING;
10764 out:
10765         btrfs_release_path(&path);
10766         if (eb && (err & BACKREF_MISSING))
10767                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10768                         bytenr, nodesize, owner, level);
10769         return err;
10770 }
10771
10772 /*
10773  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10774  *
10775  * Return >0 any error found and output error message
10776  * Return 0 for no error found
10777  */
10778 static int check_extent_data_item(struct btrfs_root *root,
10779                                   struct extent_buffer *eb, int slot)
10780 {
10781         struct btrfs_file_extent_item *fi;
10782         struct btrfs_path path;
10783         struct btrfs_root *extent_root = root->fs_info->extent_root;
10784         struct btrfs_key fi_key;
10785         struct btrfs_key dbref_key;
10786         struct extent_buffer *leaf;
10787         struct btrfs_extent_item *ei;
10788         struct btrfs_extent_inline_ref *iref;
10789         struct btrfs_extent_data_ref *dref;
10790         u64 owner;
10791         u64 disk_bytenr;
10792         u64 disk_num_bytes;
10793         u64 extent_num_bytes;
10794         u64 extent_flags;
10795         u32 item_size;
10796         unsigned long end;
10797         unsigned long ptr;
10798         int type;
10799         u64 ref_root;
10800         int found_dbackref = 0;
10801         int err = 0;
10802         int ret;
10803
10804         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10805         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10806
10807         /* Nothing to check for hole and inline data extents */
10808         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10809             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10810                 return 0;
10811
10812         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10813         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10814         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10815
10816         /* Check unaligned disk_num_bytes and num_bytes */
10817         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10818                 error(
10819 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10820                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10821                         root->fs_info->sectorsize);
10822                 err |= BYTES_UNALIGNED;
10823         } else {
10824                 data_bytes_allocated += disk_num_bytes;
10825         }
10826         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10827                 error(
10828 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10829                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10830                         root->fs_info->sectorsize);
10831                 err |= BYTES_UNALIGNED;
10832         } else {
10833                 data_bytes_referenced += extent_num_bytes;
10834         }
10835         owner = btrfs_header_owner(eb);
10836
10837         /* Check the extent item of the file extent in extent tree */
10838         btrfs_init_path(&path);
10839         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10840         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10841         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10842
10843         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10844         if (ret)
10845                 goto out;
10846
10847         leaf = path.nodes[0];
10848         slot = path.slots[0];
10849         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10850
10851         extent_flags = btrfs_extent_flags(leaf, ei);
10852
10853         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10854                 error(
10855                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10856                     disk_bytenr, disk_num_bytes,
10857                     BTRFS_EXTENT_FLAG_DATA);
10858                 err |= BACKREF_MISMATCH;
10859         }
10860
10861         /* Check data backref inside that extent item */
10862         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10863         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10864         ptr = (unsigned long)iref;
10865         end = (unsigned long)ei + item_size;
10866         while (ptr < end) {
10867                 iref = (struct btrfs_extent_inline_ref *)ptr;
10868                 type = btrfs_extent_inline_ref_type(leaf, iref);
10869                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10870
10871                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10872                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10873                         if (ref_root == owner || ref_root == root->objectid)
10874                                 found_dbackref = 1;
10875                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10876                         found_dbackref = !check_tree_block_ref(root, NULL,
10877                                 btrfs_extent_inline_ref_offset(leaf, iref),
10878                                 0, owner);
10879                 }
10880
10881                 if (found_dbackref)
10882                         break;
10883                 ptr += btrfs_extent_inline_ref_size(type);
10884         }
10885
10886         if (!found_dbackref) {
10887                 btrfs_release_path(&path);
10888
10889                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10890                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10891                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10892                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10893                                 fi_key.objectid, fi_key.offset);
10894
10895                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10896                                         &dbref_key, &path, 0, 0);
10897                 if (!ret) {
10898                         found_dbackref = 1;
10899                         goto out;
10900                 }
10901
10902                 btrfs_release_path(&path);
10903
10904                 /*
10905                  * Neither inlined nor EXTENT_DATA_REF found, try
10906                  * SHARED_DATA_REF as last chance.
10907                  */
10908                 dbref_key.objectid = disk_bytenr;
10909                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10910                 dbref_key.offset = eb->start;
10911
10912                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10913                                         &dbref_key, &path, 0, 0);
10914                 if (!ret) {
10915                         found_dbackref = 1;
10916                         goto out;
10917                 }
10918         }
10919
10920 out:
10921         if (!found_dbackref)
10922                 err |= BACKREF_MISSING;
10923         btrfs_release_path(&path);
10924         if (err & BACKREF_MISSING) {
10925                 error("data extent[%llu %llu] backref lost",
10926                       disk_bytenr, disk_num_bytes);
10927         }
10928         return err;
10929 }
10930
10931 /*
10932  * Get real tree block level for the case like shared block
10933  * Return >= 0 as tree level
10934  * Return <0 for error
10935  */
10936 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10937 {
10938         struct extent_buffer *eb;
10939         struct btrfs_path path;
10940         struct btrfs_key key;
10941         struct btrfs_extent_item *ei;
10942         u64 flags;
10943         u64 transid;
10944         u8 backref_level;
10945         u8 header_level;
10946         int ret;
10947
10948         /* Search extent tree for extent generation and level */
10949         key.objectid = bytenr;
10950         key.type = BTRFS_METADATA_ITEM_KEY;
10951         key.offset = (u64)-1;
10952
10953         btrfs_init_path(&path);
10954         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10955         if (ret < 0)
10956                 goto release_out;
10957         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10958         if (ret < 0)
10959                 goto release_out;
10960         if (ret > 0) {
10961                 ret = -ENOENT;
10962                 goto release_out;
10963         }
10964
10965         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10966         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10967                             struct btrfs_extent_item);
10968         flags = btrfs_extent_flags(path.nodes[0], ei);
10969         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10970                 ret = -ENOENT;
10971                 goto release_out;
10972         }
10973
10974         /* Get transid for later read_tree_block() check */
10975         transid = btrfs_extent_generation(path.nodes[0], ei);
10976
10977         /* Get backref level as one source */
10978         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10979                 backref_level = key.offset;
10980         } else {
10981                 struct btrfs_tree_block_info *info;
10982
10983                 info = (struct btrfs_tree_block_info *)(ei + 1);
10984                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10985         }
10986         btrfs_release_path(&path);
10987
10988         /* Get level from tree block as an alternative source */
10989         eb = read_tree_block(fs_info, bytenr, transid);
10990         if (!extent_buffer_uptodate(eb)) {
10991                 free_extent_buffer(eb);
10992                 return -EIO;
10993         }
10994         header_level = btrfs_header_level(eb);
10995         free_extent_buffer(eb);
10996
10997         if (header_level != backref_level)
10998                 return -EIO;
10999         return header_level;
11000
11001 release_out:
11002         btrfs_release_path(&path);
11003         return ret;
11004 }
11005
11006 /*
11007  * Check if a tree block backref is valid (points to a valid tree block)
11008  * if level == -1, level will be resolved
11009  * Return >0 for any error found and print error message
11010  */
11011 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11012                                     u64 bytenr, int level)
11013 {
11014         struct btrfs_root *root;
11015         struct btrfs_key key;
11016         struct btrfs_path path;
11017         struct extent_buffer *eb;
11018         struct extent_buffer *node;
11019         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11020         int err = 0;
11021         int ret;
11022
11023         /* Query level for level == -1 special case */
11024         if (level == -1)
11025                 level = query_tree_block_level(fs_info, bytenr);
11026         if (level < 0) {
11027                 err |= REFERENCER_MISSING;
11028                 goto out;
11029         }
11030
11031         key.objectid = root_id;
11032         key.type = BTRFS_ROOT_ITEM_KEY;
11033         key.offset = (u64)-1;
11034
11035         root = btrfs_read_fs_root(fs_info, &key);
11036         if (IS_ERR(root)) {
11037                 err |= REFERENCER_MISSING;
11038                 goto out;
11039         }
11040
11041         /* Read out the tree block to get item/node key */
11042         eb = read_tree_block(fs_info, bytenr, 0);
11043         if (!extent_buffer_uptodate(eb)) {
11044                 err |= REFERENCER_MISSING;
11045                 free_extent_buffer(eb);
11046                 goto out;
11047         }
11048
11049         /* Empty tree, no need to check key */
11050         if (!btrfs_header_nritems(eb) && !level) {
11051                 free_extent_buffer(eb);
11052                 goto out;
11053         }
11054
11055         if (level)
11056                 btrfs_node_key_to_cpu(eb, &key, 0);
11057         else
11058                 btrfs_item_key_to_cpu(eb, &key, 0);
11059
11060         free_extent_buffer(eb);
11061
11062         btrfs_init_path(&path);
11063         path.lowest_level = level;
11064         /* Search with the first key, to ensure we can reach it */
11065         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11066         if (ret < 0) {
11067                 err |= REFERENCER_MISSING;
11068                 goto release_out;
11069         }
11070
11071         node = path.nodes[level];
11072         if (btrfs_header_bytenr(node) != bytenr) {
11073                 error(
11074         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11075                         bytenr, nodesize, bytenr,
11076                         btrfs_header_bytenr(node));
11077                 err |= REFERENCER_MISMATCH;
11078         }
11079         if (btrfs_header_level(node) != level) {
11080                 error(
11081         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11082                         bytenr, nodesize, level,
11083                         btrfs_header_level(node));
11084                 err |= REFERENCER_MISMATCH;
11085         }
11086
11087 release_out:
11088         btrfs_release_path(&path);
11089 out:
11090         if (err & REFERENCER_MISSING) {
11091                 if (level < 0)
11092                         error("extent [%llu %d] lost referencer (owner: %llu)",
11093                                 bytenr, nodesize, root_id);
11094                 else
11095                         error(
11096                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11097                                 bytenr, nodesize, root_id, level);
11098         }
11099
11100         return err;
11101 }
11102
11103 /*
11104  * Check if tree block @eb is tree reloc root.
11105  * Return 0 if it's not or any problem happens
11106  * Return 1 if it's a tree reloc root
11107  */
11108 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11109                                  struct extent_buffer *eb)
11110 {
11111         struct btrfs_root *tree_reloc_root;
11112         struct btrfs_key key;
11113         u64 bytenr = btrfs_header_bytenr(eb);
11114         u64 owner = btrfs_header_owner(eb);
11115         int ret = 0;
11116
11117         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11118         key.offset = owner;
11119         key.type = BTRFS_ROOT_ITEM_KEY;
11120
11121         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11122         if (IS_ERR(tree_reloc_root))
11123                 return 0;
11124
11125         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11126                 ret = 1;
11127         btrfs_free_fs_root(tree_reloc_root);
11128         return ret;
11129 }
11130
11131 /*
11132  * Check referencer for shared block backref
11133  * If level == -1, this function will resolve the level.
11134  */
11135 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11136                                      u64 parent, u64 bytenr, int level)
11137 {
11138         struct extent_buffer *eb;
11139         u32 nr;
11140         int found_parent = 0;
11141         int i;
11142
11143         eb = read_tree_block(fs_info, parent, 0);
11144         if (!extent_buffer_uptodate(eb))
11145                 goto out;
11146
11147         if (level == -1)
11148                 level = query_tree_block_level(fs_info, bytenr);
11149         if (level < 0)
11150                 goto out;
11151
11152         /* It's possible it's a tree reloc root */
11153         if (parent == bytenr) {
11154                 if (is_tree_reloc_root(fs_info, eb))
11155                         found_parent = 1;
11156                 goto out;
11157         }
11158
11159         if (level + 1 != btrfs_header_level(eb))
11160                 goto out;
11161
11162         nr = btrfs_header_nritems(eb);
11163         for (i = 0; i < nr; i++) {
11164                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11165                         found_parent = 1;
11166                         break;
11167                 }
11168         }
11169 out:
11170         free_extent_buffer(eb);
11171         if (!found_parent) {
11172                 error(
11173         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11174                         bytenr, fs_info->nodesize, parent, level);
11175                 return REFERENCER_MISSING;
11176         }
11177         return 0;
11178 }
11179
11180 /*
11181  * Check referencer for normal (inlined) data ref
11182  * If len == 0, it will be resolved by searching in extent tree
11183  */
11184 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11185                                      u64 root_id, u64 objectid, u64 offset,
11186                                      u64 bytenr, u64 len, u32 count)
11187 {
11188         struct btrfs_root *root;
11189         struct btrfs_root *extent_root = fs_info->extent_root;
11190         struct btrfs_key key;
11191         struct btrfs_path path;
11192         struct extent_buffer *leaf;
11193         struct btrfs_file_extent_item *fi;
11194         u32 found_count = 0;
11195         int slot;
11196         int ret = 0;
11197
11198         if (!len) {
11199                 key.objectid = bytenr;
11200                 key.type = BTRFS_EXTENT_ITEM_KEY;
11201                 key.offset = (u64)-1;
11202
11203                 btrfs_init_path(&path);
11204                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11205                 if (ret < 0)
11206                         goto out;
11207                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11208                 if (ret)
11209                         goto out;
11210                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11211                 if (key.objectid != bytenr ||
11212                     key.type != BTRFS_EXTENT_ITEM_KEY)
11213                         goto out;
11214                 len = key.offset;
11215                 btrfs_release_path(&path);
11216         }
11217         key.objectid = root_id;
11218         key.type = BTRFS_ROOT_ITEM_KEY;
11219         key.offset = (u64)-1;
11220         btrfs_init_path(&path);
11221
11222         root = btrfs_read_fs_root(fs_info, &key);
11223         if (IS_ERR(root))
11224                 goto out;
11225
11226         key.objectid = objectid;
11227         key.type = BTRFS_EXTENT_DATA_KEY;
11228         /*
11229          * It can be nasty as data backref offset is
11230          * file offset - file extent offset, which is smaller or
11231          * equal to original backref offset.  The only special case is
11232          * overflow.  So we need to special check and do further search.
11233          */
11234         key.offset = offset & (1ULL << 63) ? 0 : offset;
11235
11236         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11237         if (ret < 0)
11238                 goto out;
11239
11240         /*
11241          * Search afterwards to get correct one
11242          * NOTE: As we must do a comprehensive check on the data backref to
11243          * make sure the dref count also matches, we must iterate all file
11244          * extents for that inode.
11245          */
11246         while (1) {
11247                 leaf = path.nodes[0];
11248                 slot = path.slots[0];
11249
11250                 if (slot >= btrfs_header_nritems(leaf))
11251                         goto next;
11252                 btrfs_item_key_to_cpu(leaf, &key, slot);
11253                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11254                         break;
11255                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11256                 /*
11257                  * Except normal disk bytenr and disk num bytes, we still
11258                  * need to do extra check on dbackref offset as
11259                  * dbackref offset = file_offset - file_extent_offset
11260                  */
11261                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11262                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11263                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11264                     offset)
11265                         found_count++;
11266
11267 next:
11268                 ret = btrfs_next_item(root, &path);
11269                 if (ret)
11270                         break;
11271         }
11272 out:
11273         btrfs_release_path(&path);
11274         if (found_count != count) {
11275                 error(
11276 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11277                         bytenr, len, root_id, objectid, offset, count, found_count);
11278                 return REFERENCER_MISSING;
11279         }
11280         return 0;
11281 }
11282
11283 /*
11284  * Check if the referencer of a shared data backref exists
11285  */
11286 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11287                                      u64 parent, u64 bytenr)
11288 {
11289         struct extent_buffer *eb;
11290         struct btrfs_key key;
11291         struct btrfs_file_extent_item *fi;
11292         u32 nr;
11293         int found_parent = 0;
11294         int i;
11295
11296         eb = read_tree_block(fs_info, parent, 0);
11297         if (!extent_buffer_uptodate(eb))
11298                 goto out;
11299
11300         nr = btrfs_header_nritems(eb);
11301         for (i = 0; i < nr; i++) {
11302                 btrfs_item_key_to_cpu(eb, &key, i);
11303                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11304                         continue;
11305
11306                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11307                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11308                         continue;
11309
11310                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11311                         found_parent = 1;
11312                         break;
11313                 }
11314         }
11315
11316 out:
11317         free_extent_buffer(eb);
11318         if (!found_parent) {
11319                 error("shared extent %llu referencer lost (parent: %llu)",
11320                         bytenr, parent);
11321                 return REFERENCER_MISSING;
11322         }
11323         return 0;
11324 }
11325
11326 /*
11327  * This function will check a given extent item, including its backref and
11328  * itself (like crossing stripe boundary and type)
11329  *
11330  * Since we don't use extent_record anymore, introduce new error bit
11331  */
11332 static int check_extent_item(struct btrfs_fs_info *fs_info,
11333                              struct extent_buffer *eb, int slot)
11334 {
11335         struct btrfs_extent_item *ei;
11336         struct btrfs_extent_inline_ref *iref;
11337         struct btrfs_extent_data_ref *dref;
11338         unsigned long end;
11339         unsigned long ptr;
11340         int type;
11341         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11342         u32 item_size = btrfs_item_size_nr(eb, slot);
11343         u64 flags;
11344         u64 offset;
11345         int metadata = 0;
11346         int level;
11347         struct btrfs_key key;
11348         int ret;
11349         int err = 0;
11350
11351         btrfs_item_key_to_cpu(eb, &key, slot);
11352         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11353                 bytes_used += key.offset;
11354         else
11355                 bytes_used += nodesize;
11356
11357         if (item_size < sizeof(*ei)) {
11358                 /*
11359                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11360                  * old thing when on disk format is still un-determined.
11361                  * No need to care about it anymore
11362                  */
11363                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11364                 return -ENOTTY;
11365         }
11366
11367         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11368         flags = btrfs_extent_flags(eb, ei);
11369
11370         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11371                 metadata = 1;
11372         if (metadata && check_crossing_stripes(global_info, key.objectid,
11373                                                eb->len)) {
11374                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11375                       key.objectid, key.objectid + nodesize);
11376                 err |= CROSSING_STRIPE_BOUNDARY;
11377         }
11378
11379         ptr = (unsigned long)(ei + 1);
11380
11381         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11382                 /* Old EXTENT_ITEM metadata */
11383                 struct btrfs_tree_block_info *info;
11384
11385                 info = (struct btrfs_tree_block_info *)ptr;
11386                 level = btrfs_tree_block_level(eb, info);
11387                 ptr += sizeof(struct btrfs_tree_block_info);
11388         } else {
11389                 /* New METADATA_ITEM */
11390                 level = key.offset;
11391         }
11392         end = (unsigned long)ei + item_size;
11393
11394 next:
11395         /* Reached extent item end normally */
11396         if (ptr == end)
11397                 goto out;
11398
11399         /* Beyond extent item end, wrong item size */
11400         if (ptr > end) {
11401                 err |= ITEM_SIZE_MISMATCH;
11402                 error("extent item at bytenr %llu slot %d has wrong size",
11403                         eb->start, slot);
11404                 goto out;
11405         }
11406
11407         /* Now check every backref in this extent item */
11408         iref = (struct btrfs_extent_inline_ref *)ptr;
11409         type = btrfs_extent_inline_ref_type(eb, iref);
11410         offset = btrfs_extent_inline_ref_offset(eb, iref);
11411         switch (type) {
11412         case BTRFS_TREE_BLOCK_REF_KEY:
11413                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11414                                                level);
11415                 err |= ret;
11416                 break;
11417         case BTRFS_SHARED_BLOCK_REF_KEY:
11418                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11419                                                  level);
11420                 err |= ret;
11421                 break;
11422         case BTRFS_EXTENT_DATA_REF_KEY:
11423                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11424                 ret = check_extent_data_backref(fs_info,
11425                                 btrfs_extent_data_ref_root(eb, dref),
11426                                 btrfs_extent_data_ref_objectid(eb, dref),
11427                                 btrfs_extent_data_ref_offset(eb, dref),
11428                                 key.objectid, key.offset,
11429                                 btrfs_extent_data_ref_count(eb, dref));
11430                 err |= ret;
11431                 break;
11432         case BTRFS_SHARED_DATA_REF_KEY:
11433                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11434                 err |= ret;
11435                 break;
11436         default:
11437                 error("extent[%llu %d %llu] has unknown ref type: %d",
11438                         key.objectid, key.type, key.offset, type);
11439                 err |= UNKNOWN_TYPE;
11440                 goto out;
11441         }
11442
11443         ptr += btrfs_extent_inline_ref_size(type);
11444         goto next;
11445
11446 out:
11447         return err;
11448 }
11449
11450 /*
11451  * Check if a dev extent item is referred correctly by its chunk
11452  */
11453 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11454                                  struct extent_buffer *eb, int slot)
11455 {
11456         struct btrfs_root *chunk_root = fs_info->chunk_root;
11457         struct btrfs_dev_extent *ptr;
11458         struct btrfs_path path;
11459         struct btrfs_key chunk_key;
11460         struct btrfs_key devext_key;
11461         struct btrfs_chunk *chunk;
11462         struct extent_buffer *l;
11463         int num_stripes;
11464         u64 length;
11465         int i;
11466         int found_chunk = 0;
11467         int ret;
11468
11469         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11470         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11471         length = btrfs_dev_extent_length(eb, ptr);
11472
11473         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11474         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11475         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11476
11477         btrfs_init_path(&path);
11478         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11479         if (ret)
11480                 goto out;
11481
11482         l = path.nodes[0];
11483         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11484         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11485                                       chunk_key.offset);
11486         if (ret < 0)
11487                 goto out;
11488
11489         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11490                 goto out;
11491
11492         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11493         for (i = 0; i < num_stripes; i++) {
11494                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11495                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11496
11497                 if (devid == devext_key.objectid &&
11498                     offset == devext_key.offset) {
11499                         found_chunk = 1;
11500                         break;
11501                 }
11502         }
11503 out:
11504         btrfs_release_path(&path);
11505         if (!found_chunk) {
11506                 error(
11507                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11508                         devext_key.objectid, devext_key.offset, length);
11509                 return REFERENCER_MISSING;
11510         }
11511         return 0;
11512 }
11513
11514 /*
11515  * Check if the used space is correct with the dev item
11516  */
11517 static int check_dev_item(struct btrfs_fs_info *fs_info,
11518                           struct extent_buffer *eb, int slot)
11519 {
11520         struct btrfs_root *dev_root = fs_info->dev_root;
11521         struct btrfs_dev_item *dev_item;
11522         struct btrfs_path path;
11523         struct btrfs_key key;
11524         struct btrfs_dev_extent *ptr;
11525         u64 dev_id;
11526         u64 used;
11527         u64 total = 0;
11528         int ret;
11529
11530         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11531         dev_id = btrfs_device_id(eb, dev_item);
11532         used = btrfs_device_bytes_used(eb, dev_item);
11533
11534         key.objectid = dev_id;
11535         key.type = BTRFS_DEV_EXTENT_KEY;
11536         key.offset = 0;
11537
11538         btrfs_init_path(&path);
11539         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11540         if (ret < 0) {
11541                 btrfs_item_key_to_cpu(eb, &key, slot);
11542                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11543                         key.objectid, key.type, key.offset);
11544                 btrfs_release_path(&path);
11545                 return REFERENCER_MISSING;
11546         }
11547
11548         /* Iterate dev_extents to calculate the used space of a device */
11549         while (1) {
11550                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11551                         goto next;
11552
11553                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11554                 if (key.objectid > dev_id)
11555                         break;
11556                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11557                         goto next;
11558
11559                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11560                                      struct btrfs_dev_extent);
11561                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11562 next:
11563                 ret = btrfs_next_item(dev_root, &path);
11564                 if (ret)
11565                         break;
11566         }
11567         btrfs_release_path(&path);
11568
11569         if (used != total) {
11570                 btrfs_item_key_to_cpu(eb, &key, slot);
11571                 error(
11572 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11573                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11574                         BTRFS_DEV_EXTENT_KEY, dev_id);
11575                 return ACCOUNTING_MISMATCH;
11576         }
11577         return 0;
11578 }
11579
11580 /*
11581  * Check a block group item with its referener (chunk) and its used space
11582  * with extent/metadata item
11583  */
11584 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11585                                   struct extent_buffer *eb, int slot)
11586 {
11587         struct btrfs_root *extent_root = fs_info->extent_root;
11588         struct btrfs_root *chunk_root = fs_info->chunk_root;
11589         struct btrfs_block_group_item *bi;
11590         struct btrfs_block_group_item bg_item;
11591         struct btrfs_path path;
11592         struct btrfs_key bg_key;
11593         struct btrfs_key chunk_key;
11594         struct btrfs_key extent_key;
11595         struct btrfs_chunk *chunk;
11596         struct extent_buffer *leaf;
11597         struct btrfs_extent_item *ei;
11598         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11599         u64 flags;
11600         u64 bg_flags;
11601         u64 used;
11602         u64 total = 0;
11603         int ret;
11604         int err = 0;
11605
11606         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11607         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11608         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11609         used = btrfs_block_group_used(&bg_item);
11610         bg_flags = btrfs_block_group_flags(&bg_item);
11611
11612         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11613         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11614         chunk_key.offset = bg_key.objectid;
11615
11616         btrfs_init_path(&path);
11617         /* Search for the referencer chunk */
11618         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11619         if (ret) {
11620                 error(
11621                 "block group[%llu %llu] did not find the related chunk item",
11622                         bg_key.objectid, bg_key.offset);
11623                 err |= REFERENCER_MISSING;
11624         } else {
11625                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11626                                         struct btrfs_chunk);
11627                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11628                                                 bg_key.offset) {
11629                         error(
11630         "block group[%llu %llu] related chunk item length does not match",
11631                                 bg_key.objectid, bg_key.offset);
11632                         err |= REFERENCER_MISMATCH;
11633                 }
11634         }
11635         btrfs_release_path(&path);
11636
11637         /* Search from the block group bytenr */
11638         extent_key.objectid = bg_key.objectid;
11639         extent_key.type = 0;
11640         extent_key.offset = 0;
11641
11642         btrfs_init_path(&path);
11643         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11644         if (ret < 0)
11645                 goto out;
11646
11647         /* Iterate extent tree to account used space */
11648         while (1) {
11649                 leaf = path.nodes[0];
11650
11651                 /* Search slot can point to the last item beyond leaf nritems */
11652                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11653                         goto next;
11654
11655                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11656                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11657                         break;
11658
11659                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11660                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11661                         goto next;
11662                 if (extent_key.objectid < bg_key.objectid)
11663                         goto next;
11664
11665                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11666                         total += nodesize;
11667                 else
11668                         total += extent_key.offset;
11669
11670                 ei = btrfs_item_ptr(leaf, path.slots[0],
11671                                     struct btrfs_extent_item);
11672                 flags = btrfs_extent_flags(leaf, ei);
11673                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11674                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11675                                 error(
11676                         "bad extent[%llu, %llu) type mismatch with chunk",
11677                                         extent_key.objectid,
11678                                         extent_key.objectid + extent_key.offset);
11679                                 err |= CHUNK_TYPE_MISMATCH;
11680                         }
11681                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11682                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11683                                     BTRFS_BLOCK_GROUP_METADATA))) {
11684                                 error(
11685                         "bad extent[%llu, %llu) type mismatch with chunk",
11686                                         extent_key.objectid,
11687                                         extent_key.objectid + nodesize);
11688                                 err |= CHUNK_TYPE_MISMATCH;
11689                         }
11690                 }
11691 next:
11692                 ret = btrfs_next_item(extent_root, &path);
11693                 if (ret)
11694                         break;
11695         }
11696
11697 out:
11698         btrfs_release_path(&path);
11699
11700         if (total != used) {
11701                 error(
11702                 "block group[%llu %llu] used %llu but extent items used %llu",
11703                         bg_key.objectid, bg_key.offset, used, total);
11704                 err |= ACCOUNTING_MISMATCH;
11705         }
11706         return err;
11707 }
11708
11709 /*
11710  * Check a chunk item.
11711  * Including checking all referred dev_extents and block group
11712  */
11713 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11714                             struct extent_buffer *eb, int slot)
11715 {
11716         struct btrfs_root *extent_root = fs_info->extent_root;
11717         struct btrfs_root *dev_root = fs_info->dev_root;
11718         struct btrfs_path path;
11719         struct btrfs_key chunk_key;
11720         struct btrfs_key bg_key;
11721         struct btrfs_key devext_key;
11722         struct btrfs_chunk *chunk;
11723         struct extent_buffer *leaf;
11724         struct btrfs_block_group_item *bi;
11725         struct btrfs_block_group_item bg_item;
11726         struct btrfs_dev_extent *ptr;
11727         u64 length;
11728         u64 chunk_end;
11729         u64 stripe_len;
11730         u64 type;
11731         int num_stripes;
11732         u64 offset;
11733         u64 objectid;
11734         int i;
11735         int ret;
11736         int err = 0;
11737
11738         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11739         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11740         length = btrfs_chunk_length(eb, chunk);
11741         chunk_end = chunk_key.offset + length;
11742         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11743                                       chunk_key.offset);
11744         if (ret < 0) {
11745                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11746                         chunk_end);
11747                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11748                 goto out;
11749         }
11750         type = btrfs_chunk_type(eb, chunk);
11751
11752         bg_key.objectid = chunk_key.offset;
11753         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11754         bg_key.offset = length;
11755
11756         btrfs_init_path(&path);
11757         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11758         if (ret) {
11759                 error(
11760                 "chunk[%llu %llu) did not find the related block group item",
11761                         chunk_key.offset, chunk_end);
11762                 err |= REFERENCER_MISSING;
11763         } else{
11764                 leaf = path.nodes[0];
11765                 bi = btrfs_item_ptr(leaf, path.slots[0],
11766                                     struct btrfs_block_group_item);
11767                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11768                                    sizeof(bg_item));
11769                 if (btrfs_block_group_flags(&bg_item) != type) {
11770                         error(
11771 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11772                                 chunk_key.offset, chunk_end, type,
11773                                 btrfs_block_group_flags(&bg_item));
11774                         err |= REFERENCER_MISSING;
11775                 }
11776         }
11777
11778         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11779         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11780         for (i = 0; i < num_stripes; i++) {
11781                 btrfs_release_path(&path);
11782                 btrfs_init_path(&path);
11783                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11784                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11785                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11786
11787                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11788                                         0, 0);
11789                 if (ret)
11790                         goto not_match_dev;
11791
11792                 leaf = path.nodes[0];
11793                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11794                                      struct btrfs_dev_extent);
11795                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11796                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11797                 if (objectid != chunk_key.objectid ||
11798                     offset != chunk_key.offset ||
11799                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11800                         goto not_match_dev;
11801                 continue;
11802 not_match_dev:
11803                 err |= BACKREF_MISSING;
11804                 error(
11805                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11806                         chunk_key.objectid, chunk_end, i);
11807                 continue;
11808         }
11809         btrfs_release_path(&path);
11810 out:
11811         return err;
11812 }
11813
11814 /*
11815  * Main entry function to check known items and update related accounting info
11816  */
11817 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11818 {
11819         struct btrfs_fs_info *fs_info = root->fs_info;
11820         struct btrfs_key key;
11821         int slot = 0;
11822         int type;
11823         struct btrfs_extent_data_ref *dref;
11824         int ret;
11825         int err = 0;
11826
11827 next:
11828         btrfs_item_key_to_cpu(eb, &key, slot);
11829         type = key.type;
11830
11831         switch (type) {
11832         case BTRFS_EXTENT_DATA_KEY:
11833                 ret = check_extent_data_item(root, eb, slot);
11834                 err |= ret;
11835                 break;
11836         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11837                 ret = check_block_group_item(fs_info, eb, slot);
11838                 err |= ret;
11839                 break;
11840         case BTRFS_DEV_ITEM_KEY:
11841                 ret = check_dev_item(fs_info, eb, slot);
11842                 err |= ret;
11843                 break;
11844         case BTRFS_CHUNK_ITEM_KEY:
11845                 ret = check_chunk_item(fs_info, eb, slot);
11846                 err |= ret;
11847                 break;
11848         case BTRFS_DEV_EXTENT_KEY:
11849                 ret = check_dev_extent_item(fs_info, eb, slot);
11850                 err |= ret;
11851                 break;
11852         case BTRFS_EXTENT_ITEM_KEY:
11853         case BTRFS_METADATA_ITEM_KEY:
11854                 ret = check_extent_item(fs_info, eb, slot);
11855                 err |= ret;
11856                 break;
11857         case BTRFS_EXTENT_CSUM_KEY:
11858                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11859                 break;
11860         case BTRFS_TREE_BLOCK_REF_KEY:
11861                 ret = check_tree_block_backref(fs_info, key.offset,
11862                                                key.objectid, -1);
11863                 err |= ret;
11864                 break;
11865         case BTRFS_EXTENT_DATA_REF_KEY:
11866                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11867                 ret = check_extent_data_backref(fs_info,
11868                                 btrfs_extent_data_ref_root(eb, dref),
11869                                 btrfs_extent_data_ref_objectid(eb, dref),
11870                                 btrfs_extent_data_ref_offset(eb, dref),
11871                                 key.objectid, 0,
11872                                 btrfs_extent_data_ref_count(eb, dref));
11873                 err |= ret;
11874                 break;
11875         case BTRFS_SHARED_BLOCK_REF_KEY:
11876                 ret = check_shared_block_backref(fs_info, key.offset,
11877                                                  key.objectid, -1);
11878                 err |= ret;
11879                 break;
11880         case BTRFS_SHARED_DATA_REF_KEY:
11881                 ret = check_shared_data_backref(fs_info, key.offset,
11882                                                 key.objectid);
11883                 err |= ret;
11884                 break;
11885         default:
11886                 break;
11887         }
11888
11889         if (++slot < btrfs_header_nritems(eb))
11890                 goto next;
11891
11892         return err;
11893 }
11894
11895 /*
11896  * Helper function for later fs/subvol tree check.  To determine if a tree
11897  * block should be checked.
11898  * This function will ensure only the direct referencer with lowest rootid to
11899  * check a fs/subvolume tree block.
11900  *
11901  * Backref check at extent tree would detect errors like missing subvolume
11902  * tree, so we can do aggressive check to reduce duplicated checks.
11903  */
11904 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11905 {
11906         struct btrfs_root *extent_root = root->fs_info->extent_root;
11907         struct btrfs_key key;
11908         struct btrfs_path path;
11909         struct extent_buffer *leaf;
11910         int slot;
11911         struct btrfs_extent_item *ei;
11912         unsigned long ptr;
11913         unsigned long end;
11914         int type;
11915         u32 item_size;
11916         u64 offset;
11917         struct btrfs_extent_inline_ref *iref;
11918         int ret;
11919
11920         btrfs_init_path(&path);
11921         key.objectid = btrfs_header_bytenr(eb);
11922         key.type = BTRFS_METADATA_ITEM_KEY;
11923         key.offset = (u64)-1;
11924
11925         /*
11926          * Any failure in backref resolving means we can't determine
11927          * whom the tree block belongs to.
11928          * So in that case, we need to check that tree block
11929          */
11930         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11931         if (ret < 0)
11932                 goto need_check;
11933
11934         ret = btrfs_previous_extent_item(extent_root, &path,
11935                                          btrfs_header_bytenr(eb));
11936         if (ret)
11937                 goto need_check;
11938
11939         leaf = path.nodes[0];
11940         slot = path.slots[0];
11941         btrfs_item_key_to_cpu(leaf, &key, slot);
11942         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11943
11944         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11945                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11946         } else {
11947                 struct btrfs_tree_block_info *info;
11948
11949                 info = (struct btrfs_tree_block_info *)(ei + 1);
11950                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11951         }
11952
11953         item_size = btrfs_item_size_nr(leaf, slot);
11954         ptr = (unsigned long)iref;
11955         end = (unsigned long)ei + item_size;
11956         while (ptr < end) {
11957                 iref = (struct btrfs_extent_inline_ref *)ptr;
11958                 type = btrfs_extent_inline_ref_type(leaf, iref);
11959                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11960
11961                 /*
11962                  * We only check the tree block if current root is
11963                  * the lowest referencer of it.
11964                  */
11965                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11966                     offset < root->objectid) {
11967                         btrfs_release_path(&path);
11968                         return 0;
11969                 }
11970
11971                 ptr += btrfs_extent_inline_ref_size(type);
11972         }
11973         /*
11974          * Normally we should also check keyed tree block ref, but that may be
11975          * very time consuming.  Inlined ref should already make us skip a lot
11976          * of refs now.  So skip search keyed tree block ref.
11977          */
11978
11979 need_check:
11980         btrfs_release_path(&path);
11981         return 1;
11982 }
11983
11984 /*
11985  * Traversal function for tree block. We will do:
11986  * 1) Skip shared fs/subvolume tree blocks
11987  * 2) Update related bytes accounting
11988  * 3) Pre-order traversal
11989  */
11990 static int traverse_tree_block(struct btrfs_root *root,
11991                                 struct extent_buffer *node)
11992 {
11993         struct extent_buffer *eb;
11994         struct btrfs_key key;
11995         struct btrfs_key drop_key;
11996         int level;
11997         u64 nr;
11998         int i;
11999         int err = 0;
12000         int ret;
12001
12002         /*
12003          * Skip shared fs/subvolume tree block, in that case they will
12004          * be checked by referencer with lowest rootid
12005          */
12006         if (is_fstree(root->objectid) && !should_check(root, node))
12007                 return 0;
12008
12009         /* Update bytes accounting */
12010         total_btree_bytes += node->len;
12011         if (fs_root_objectid(btrfs_header_owner(node)))
12012                 total_fs_tree_bytes += node->len;
12013         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12014                 total_extent_tree_bytes += node->len;
12015
12016         /* pre-order tranversal, check itself first */
12017         level = btrfs_header_level(node);
12018         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12019                                    btrfs_header_level(node),
12020                                    btrfs_header_owner(node));
12021         err |= ret;
12022         if (err)
12023                 error(
12024         "check %s failed root %llu bytenr %llu level %d, force continue check",
12025                         level ? "node":"leaf", root->objectid,
12026                         btrfs_header_bytenr(node), btrfs_header_level(node));
12027
12028         if (!level) {
12029                 btree_space_waste += btrfs_leaf_free_space(root, node);
12030                 ret = check_leaf_items(root, node);
12031                 err |= ret;
12032                 return err;
12033         }
12034
12035         nr = btrfs_header_nritems(node);
12036         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12037         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12038                 sizeof(struct btrfs_key_ptr);
12039
12040         /* Then check all its children */
12041         for (i = 0; i < nr; i++) {
12042                 u64 blocknr = btrfs_node_blockptr(node, i);
12043
12044                 btrfs_node_key_to_cpu(node, &key, i);
12045                 if (level == root->root_item.drop_level &&
12046                     is_dropped_key(&key, &drop_key))
12047                         continue;
12048
12049                 /*
12050                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12051                  * to call the function itself.
12052                  */
12053                 eb = read_tree_block(root->fs_info, blocknr, 0);
12054                 if (extent_buffer_uptodate(eb)) {
12055                         ret = traverse_tree_block(root, eb);
12056                         err |= ret;
12057                 }
12058                 free_extent_buffer(eb);
12059         }
12060
12061         return err;
12062 }
12063
12064 /*
12065  * Low memory usage version check_chunks_and_extents.
12066  */
12067 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12068 {
12069         struct btrfs_path path;
12070         struct btrfs_key key;
12071         struct btrfs_root *root1;
12072         struct btrfs_root *root;
12073         struct btrfs_root *cur_root;
12074         int err = 0;
12075         int ret;
12076
12077         root = fs_info->fs_root;
12078
12079         root1 = root->fs_info->chunk_root;
12080         ret = traverse_tree_block(root1, root1->node);
12081         err |= ret;
12082
12083         root1 = root->fs_info->tree_root;
12084         ret = traverse_tree_block(root1, root1->node);
12085         err |= ret;
12086
12087         btrfs_init_path(&path);
12088         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12089         key.offset = 0;
12090         key.type = BTRFS_ROOT_ITEM_KEY;
12091
12092         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12093         if (ret) {
12094                 error("cannot find extent treet in tree_root");
12095                 goto out;
12096         }
12097
12098         while (1) {
12099                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12100                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12101                         goto next;
12102                 key.offset = (u64)-1;
12103
12104                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12105                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12106                                         &key);
12107                 else
12108                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12109                 if (IS_ERR(cur_root) || !cur_root) {
12110                         error("failed to read tree: %lld", key.objectid);
12111                         goto next;
12112                 }
12113
12114                 ret = traverse_tree_block(cur_root, cur_root->node);
12115                 err |= ret;
12116
12117                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12118                         btrfs_free_fs_root(cur_root);
12119 next:
12120                 ret = btrfs_next_item(root1, &path);
12121                 if (ret)
12122                         goto out;
12123         }
12124
12125 out:
12126         btrfs_release_path(&path);
12127         return err;
12128 }
12129
12130 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12131 {
12132         int ret;
12133
12134         if (!ctx.progress_enabled)
12135                 fprintf(stderr, "checking extents\n");
12136         if (check_mode == CHECK_MODE_LOWMEM)
12137                 ret = check_chunks_and_extents_v2(fs_info);
12138         else
12139                 ret = check_chunks_and_extents(fs_info);
12140
12141         return ret;
12142 }
12143
12144 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12145                            struct btrfs_root *root, int overwrite)
12146 {
12147         struct extent_buffer *c;
12148         struct extent_buffer *old = root->node;
12149         int level;
12150         int ret;
12151         struct btrfs_disk_key disk_key = {0,0,0};
12152
12153         level = 0;
12154
12155         if (overwrite) {
12156                 c = old;
12157                 extent_buffer_get(c);
12158                 goto init;
12159         }
12160         c = btrfs_alloc_free_block(trans, root,
12161                                    root->fs_info->nodesize,
12162                                    root->root_key.objectid,
12163                                    &disk_key, level, 0, 0);
12164         if (IS_ERR(c)) {
12165                 c = old;
12166                 extent_buffer_get(c);
12167                 overwrite = 1;
12168         }
12169 init:
12170         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12171         btrfs_set_header_level(c, level);
12172         btrfs_set_header_bytenr(c, c->start);
12173         btrfs_set_header_generation(c, trans->transid);
12174         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12175         btrfs_set_header_owner(c, root->root_key.objectid);
12176
12177         write_extent_buffer(c, root->fs_info->fsid,
12178                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12179
12180         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12181                             btrfs_header_chunk_tree_uuid(c),
12182                             BTRFS_UUID_SIZE);
12183
12184         btrfs_mark_buffer_dirty(c);
12185         /*
12186          * this case can happen in the following case:
12187          *
12188          * 1.overwrite previous root.
12189          *
12190          * 2.reinit reloc data root, this is because we skip pin
12191          * down reloc data tree before which means we can allocate
12192          * same block bytenr here.
12193          */
12194         if (old->start == c->start) {
12195                 btrfs_set_root_generation(&root->root_item,
12196                                           trans->transid);
12197                 root->root_item.level = btrfs_header_level(root->node);
12198                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12199                                         &root->root_key, &root->root_item);
12200                 if (ret) {
12201                         free_extent_buffer(c);
12202                         return ret;
12203                 }
12204         }
12205         free_extent_buffer(old);
12206         root->node = c;
12207         add_root_to_dirty_list(root);
12208         return 0;
12209 }
12210
12211 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12212                                 struct extent_buffer *eb, int tree_root)
12213 {
12214         struct extent_buffer *tmp;
12215         struct btrfs_root_item *ri;
12216         struct btrfs_key key;
12217         u64 bytenr;
12218         int level = btrfs_header_level(eb);
12219         int nritems;
12220         int ret;
12221         int i;
12222
12223         /*
12224          * If we have pinned this block before, don't pin it again.
12225          * This can not only avoid forever loop with broken filesystem
12226          * but also give us some speedups.
12227          */
12228         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12229                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12230                 return 0;
12231
12232         btrfs_pin_extent(fs_info, eb->start, eb->len);
12233
12234         nritems = btrfs_header_nritems(eb);
12235         for (i = 0; i < nritems; i++) {
12236                 if (level == 0) {
12237                         btrfs_item_key_to_cpu(eb, &key, i);
12238                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12239                                 continue;
12240                         /* Skip the extent root and reloc roots */
12241                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12242                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12243                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12244                                 continue;
12245                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12246                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12247
12248                         /*
12249                          * If at any point we start needing the real root we
12250                          * will have to build a stump root for the root we are
12251                          * in, but for now this doesn't actually use the root so
12252                          * just pass in extent_root.
12253                          */
12254                         tmp = read_tree_block(fs_info, bytenr, 0);
12255                         if (!extent_buffer_uptodate(tmp)) {
12256                                 fprintf(stderr, "Error reading root block\n");
12257                                 return -EIO;
12258                         }
12259                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12260                         free_extent_buffer(tmp);
12261                         if (ret)
12262                                 return ret;
12263                 } else {
12264                         bytenr = btrfs_node_blockptr(eb, i);
12265
12266                         /* If we aren't the tree root don't read the block */
12267                         if (level == 1 && !tree_root) {
12268                                 btrfs_pin_extent(fs_info, bytenr,
12269                                                 fs_info->nodesize);
12270                                 continue;
12271                         }
12272
12273                         tmp = read_tree_block(fs_info, bytenr, 0);
12274                         if (!extent_buffer_uptodate(tmp)) {
12275                                 fprintf(stderr, "Error reading tree block\n");
12276                                 return -EIO;
12277                         }
12278                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12279                         free_extent_buffer(tmp);
12280                         if (ret)
12281                                 return ret;
12282                 }
12283         }
12284
12285         return 0;
12286 }
12287
12288 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12289 {
12290         int ret;
12291
12292         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12293         if (ret)
12294                 return ret;
12295
12296         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12297 }
12298
12299 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12300 {
12301         struct btrfs_block_group_cache *cache;
12302         struct btrfs_path path;
12303         struct extent_buffer *leaf;
12304         struct btrfs_chunk *chunk;
12305         struct btrfs_key key;
12306         int ret;
12307         u64 start;
12308
12309         btrfs_init_path(&path);
12310         key.objectid = 0;
12311         key.type = BTRFS_CHUNK_ITEM_KEY;
12312         key.offset = 0;
12313         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12314         if (ret < 0) {
12315                 btrfs_release_path(&path);
12316                 return ret;
12317         }
12318
12319         /*
12320          * We do this in case the block groups were screwed up and had alloc
12321          * bits that aren't actually set on the chunks.  This happens with
12322          * restored images every time and could happen in real life I guess.
12323          */
12324         fs_info->avail_data_alloc_bits = 0;
12325         fs_info->avail_metadata_alloc_bits = 0;
12326         fs_info->avail_system_alloc_bits = 0;
12327
12328         /* First we need to create the in-memory block groups */
12329         while (1) {
12330                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12331                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12332                         if (ret < 0) {
12333                                 btrfs_release_path(&path);
12334                                 return ret;
12335                         }
12336                         if (ret) {
12337                                 ret = 0;
12338                                 break;
12339                         }
12340                 }
12341                 leaf = path.nodes[0];
12342                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12343                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12344                         path.slots[0]++;
12345                         continue;
12346                 }
12347
12348                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12349                 btrfs_add_block_group(fs_info, 0,
12350                                       btrfs_chunk_type(leaf, chunk),
12351                                       key.objectid, key.offset,
12352                                       btrfs_chunk_length(leaf, chunk));
12353                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12354                                  key.offset + btrfs_chunk_length(leaf, chunk));
12355                 path.slots[0]++;
12356         }
12357         start = 0;
12358         while (1) {
12359                 cache = btrfs_lookup_first_block_group(fs_info, start);
12360                 if (!cache)
12361                         break;
12362                 cache->cached = 1;
12363                 start = cache->key.objectid + cache->key.offset;
12364         }
12365
12366         btrfs_release_path(&path);
12367         return 0;
12368 }
12369
12370 static int reset_balance(struct btrfs_trans_handle *trans,
12371                          struct btrfs_fs_info *fs_info)
12372 {
12373         struct btrfs_root *root = fs_info->tree_root;
12374         struct btrfs_path path;
12375         struct extent_buffer *leaf;
12376         struct btrfs_key key;
12377         int del_slot, del_nr = 0;
12378         int ret;
12379         int found = 0;
12380
12381         btrfs_init_path(&path);
12382         key.objectid = BTRFS_BALANCE_OBJECTID;
12383         key.type = BTRFS_BALANCE_ITEM_KEY;
12384         key.offset = 0;
12385         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12386         if (ret) {
12387                 if (ret > 0)
12388                         ret = 0;
12389                 if (!ret)
12390                         goto reinit_data_reloc;
12391                 else
12392                         goto out;
12393         }
12394
12395         ret = btrfs_del_item(trans, root, &path);
12396         if (ret)
12397                 goto out;
12398         btrfs_release_path(&path);
12399
12400         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12401         key.type = BTRFS_ROOT_ITEM_KEY;
12402         key.offset = 0;
12403         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12404         if (ret < 0)
12405                 goto out;
12406         while (1) {
12407                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12408                         if (!found)
12409                                 break;
12410
12411                         if (del_nr) {
12412                                 ret = btrfs_del_items(trans, root, &path,
12413                                                       del_slot, del_nr);
12414                                 del_nr = 0;
12415                                 if (ret)
12416                                         goto out;
12417                         }
12418                         key.offset++;
12419                         btrfs_release_path(&path);
12420
12421                         found = 0;
12422                         ret = btrfs_search_slot(trans, root, &key, &path,
12423                                                 -1, 1);
12424                         if (ret < 0)
12425                                 goto out;
12426                         continue;
12427                 }
12428                 found = 1;
12429                 leaf = path.nodes[0];
12430                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12431                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12432                         break;
12433                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12434                         path.slots[0]++;
12435                         continue;
12436                 }
12437                 if (!del_nr) {
12438                         del_slot = path.slots[0];
12439                         del_nr = 1;
12440                 } else {
12441                         del_nr++;
12442                 }
12443                 path.slots[0]++;
12444         }
12445
12446         if (del_nr) {
12447                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12448                 if (ret)
12449                         goto out;
12450         }
12451         btrfs_release_path(&path);
12452
12453 reinit_data_reloc:
12454         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12455         key.type = BTRFS_ROOT_ITEM_KEY;
12456         key.offset = (u64)-1;
12457         root = btrfs_read_fs_root(fs_info, &key);
12458         if (IS_ERR(root)) {
12459                 fprintf(stderr, "Error reading data reloc tree\n");
12460                 ret = PTR_ERR(root);
12461                 goto out;
12462         }
12463         record_root_in_trans(trans, root);
12464         ret = btrfs_fsck_reinit_root(trans, root, 0);
12465         if (ret)
12466                 goto out;
12467         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12468 out:
12469         btrfs_release_path(&path);
12470         return ret;
12471 }
12472
12473 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12474                               struct btrfs_fs_info *fs_info)
12475 {
12476         u64 start = 0;
12477         int ret;
12478
12479         /*
12480          * The only reason we don't do this is because right now we're just
12481          * walking the trees we find and pinning down their bytes, we don't look
12482          * at any of the leaves.  In order to do mixed groups we'd have to check
12483          * the leaves of any fs roots and pin down the bytes for any file
12484          * extents we find.  Not hard but why do it if we don't have to?
12485          */
12486         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12487                 fprintf(stderr, "We don't support re-initing the extent tree "
12488                         "for mixed block groups yet, please notify a btrfs "
12489                         "developer you want to do this so they can add this "
12490                         "functionality.\n");
12491                 return -EINVAL;
12492         }
12493
12494         /*
12495          * first we need to walk all of the trees except the extent tree and pin
12496          * down the bytes that are in use so we don't overwrite any existing
12497          * metadata.
12498          */
12499         ret = pin_metadata_blocks(fs_info);
12500         if (ret) {
12501                 fprintf(stderr, "error pinning down used bytes\n");
12502                 return ret;
12503         }
12504
12505         /*
12506          * Need to drop all the block groups since we're going to recreate all
12507          * of them again.
12508          */
12509         btrfs_free_block_groups(fs_info);
12510         ret = reset_block_groups(fs_info);
12511         if (ret) {
12512                 fprintf(stderr, "error resetting the block groups\n");
12513                 return ret;
12514         }
12515
12516         /* Ok we can allocate now, reinit the extent root */
12517         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12518         if (ret) {
12519                 fprintf(stderr, "extent root initialization failed\n");
12520                 /*
12521                  * When the transaction code is updated we should end the
12522                  * transaction, but for now progs only knows about commit so
12523                  * just return an error.
12524                  */
12525                 return ret;
12526         }
12527
12528         /*
12529          * Now we have all the in-memory block groups setup so we can make
12530          * allocations properly, and the metadata we care about is safe since we
12531          * pinned all of it above.
12532          */
12533         while (1) {
12534                 struct btrfs_block_group_cache *cache;
12535
12536                 cache = btrfs_lookup_first_block_group(fs_info, start);
12537                 if (!cache)
12538                         break;
12539                 start = cache->key.objectid + cache->key.offset;
12540                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12541                                         &cache->key, &cache->item,
12542                                         sizeof(cache->item));
12543                 if (ret) {
12544                         fprintf(stderr, "Error adding block group\n");
12545                         return ret;
12546                 }
12547                 btrfs_extent_post_op(trans, fs_info->extent_root);
12548         }
12549
12550         ret = reset_balance(trans, fs_info);
12551         if (ret)
12552                 fprintf(stderr, "error resetting the pending balance\n");
12553
12554         return ret;
12555 }
12556
12557 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12558 {
12559         struct btrfs_path path;
12560         struct btrfs_trans_handle *trans;
12561         struct btrfs_key key;
12562         int ret;
12563
12564         printf("Recowing metadata block %llu\n", eb->start);
12565         key.objectid = btrfs_header_owner(eb);
12566         key.type = BTRFS_ROOT_ITEM_KEY;
12567         key.offset = (u64)-1;
12568
12569         root = btrfs_read_fs_root(root->fs_info, &key);
12570         if (IS_ERR(root)) {
12571                 fprintf(stderr, "Couldn't find owner root %llu\n",
12572                         key.objectid);
12573                 return PTR_ERR(root);
12574         }
12575
12576         trans = btrfs_start_transaction(root, 1);
12577         if (IS_ERR(trans))
12578                 return PTR_ERR(trans);
12579
12580         btrfs_init_path(&path);
12581         path.lowest_level = btrfs_header_level(eb);
12582         if (path.lowest_level)
12583                 btrfs_node_key_to_cpu(eb, &key, 0);
12584         else
12585                 btrfs_item_key_to_cpu(eb, &key, 0);
12586
12587         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12588         btrfs_commit_transaction(trans, root);
12589         btrfs_release_path(&path);
12590         return ret;
12591 }
12592
12593 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12594 {
12595         struct btrfs_path path;
12596         struct btrfs_trans_handle *trans;
12597         struct btrfs_key key;
12598         int ret;
12599
12600         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12601                bad->key.type, bad->key.offset);
12602         key.objectid = bad->root_id;
12603         key.type = BTRFS_ROOT_ITEM_KEY;
12604         key.offset = (u64)-1;
12605
12606         root = btrfs_read_fs_root(root->fs_info, &key);
12607         if (IS_ERR(root)) {
12608                 fprintf(stderr, "Couldn't find owner root %llu\n",
12609                         key.objectid);
12610                 return PTR_ERR(root);
12611         }
12612
12613         trans = btrfs_start_transaction(root, 1);
12614         if (IS_ERR(trans))
12615                 return PTR_ERR(trans);
12616
12617         btrfs_init_path(&path);
12618         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12619         if (ret) {
12620                 if (ret > 0)
12621                         ret = 0;
12622                 goto out;
12623         }
12624         ret = btrfs_del_item(trans, root, &path);
12625 out:
12626         btrfs_commit_transaction(trans, root);
12627         btrfs_release_path(&path);
12628         return ret;
12629 }
12630
12631 static int zero_log_tree(struct btrfs_root *root)
12632 {
12633         struct btrfs_trans_handle *trans;
12634         int ret;
12635
12636         trans = btrfs_start_transaction(root, 1);
12637         if (IS_ERR(trans)) {
12638                 ret = PTR_ERR(trans);
12639                 return ret;
12640         }
12641         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12642         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12643         ret = btrfs_commit_transaction(trans, root);
12644         return ret;
12645 }
12646
12647 static int populate_csum(struct btrfs_trans_handle *trans,
12648                          struct btrfs_root *csum_root, char *buf, u64 start,
12649                          u64 len)
12650 {
12651         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12652         u64 offset = 0;
12653         u64 sectorsize;
12654         int ret = 0;
12655
12656         while (offset < len) {
12657                 sectorsize = fs_info->sectorsize;
12658                 ret = read_extent_data(fs_info, buf, start + offset,
12659                                        &sectorsize, 0);
12660                 if (ret)
12661                         break;
12662                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12663                                             start + offset, buf, sectorsize);
12664                 if (ret)
12665                         break;
12666                 offset += sectorsize;
12667         }
12668         return ret;
12669 }
12670
12671 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12672                                       struct btrfs_root *csum_root,
12673                                       struct btrfs_root *cur_root)
12674 {
12675         struct btrfs_path path;
12676         struct btrfs_key key;
12677         struct extent_buffer *node;
12678         struct btrfs_file_extent_item *fi;
12679         char *buf = NULL;
12680         u64 start = 0;
12681         u64 len = 0;
12682         int slot = 0;
12683         int ret = 0;
12684
12685         buf = malloc(cur_root->fs_info->sectorsize);
12686         if (!buf)
12687                 return -ENOMEM;
12688
12689         btrfs_init_path(&path);
12690         key.objectid = 0;
12691         key.offset = 0;
12692         key.type = 0;
12693         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12694         if (ret < 0)
12695                 goto out;
12696         /* Iterate all regular file extents and fill its csum */
12697         while (1) {
12698                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12699
12700                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12701                         goto next;
12702                 node = path.nodes[0];
12703                 slot = path.slots[0];
12704                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12705                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12706                         goto next;
12707                 start = btrfs_file_extent_disk_bytenr(node, fi);
12708                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12709
12710                 ret = populate_csum(trans, csum_root, buf, start, len);
12711                 if (ret == -EEXIST)
12712                         ret = 0;
12713                 if (ret < 0)
12714                         goto out;
12715 next:
12716                 /*
12717                  * TODO: if next leaf is corrupted, jump to nearest next valid
12718                  * leaf.
12719                  */
12720                 ret = btrfs_next_item(cur_root, &path);
12721                 if (ret < 0)
12722                         goto out;
12723                 if (ret > 0) {
12724                         ret = 0;
12725                         goto out;
12726                 }
12727         }
12728
12729 out:
12730         btrfs_release_path(&path);
12731         free(buf);
12732         return ret;
12733 }
12734
12735 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12736                                   struct btrfs_root *csum_root)
12737 {
12738         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12739         struct btrfs_path path;
12740         struct btrfs_root *tree_root = fs_info->tree_root;
12741         struct btrfs_root *cur_root;
12742         struct extent_buffer *node;
12743         struct btrfs_key key;
12744         int slot = 0;
12745         int ret = 0;
12746
12747         btrfs_init_path(&path);
12748         key.objectid = BTRFS_FS_TREE_OBJECTID;
12749         key.offset = 0;
12750         key.type = BTRFS_ROOT_ITEM_KEY;
12751         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12752         if (ret < 0)
12753                 goto out;
12754         if (ret > 0) {
12755                 ret = -ENOENT;
12756                 goto out;
12757         }
12758
12759         while (1) {
12760                 node = path.nodes[0];
12761                 slot = path.slots[0];
12762                 btrfs_item_key_to_cpu(node, &key, slot);
12763                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12764                         goto out;
12765                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12766                         goto next;
12767                 if (!is_fstree(key.objectid))
12768                         goto next;
12769                 key.offset = (u64)-1;
12770
12771                 cur_root = btrfs_read_fs_root(fs_info, &key);
12772                 if (IS_ERR(cur_root) || !cur_root) {
12773                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12774                                 key.objectid);
12775                         goto out;
12776                 }
12777                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12778                                 cur_root);
12779                 if (ret < 0)
12780                         goto out;
12781 next:
12782                 ret = btrfs_next_item(tree_root, &path);
12783                 if (ret > 0) {
12784                         ret = 0;
12785                         goto out;
12786                 }
12787                 if (ret < 0)
12788                         goto out;
12789         }
12790
12791 out:
12792         btrfs_release_path(&path);
12793         return ret;
12794 }
12795
12796 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12797                                       struct btrfs_root *csum_root)
12798 {
12799         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12800         struct btrfs_path path;
12801         struct btrfs_extent_item *ei;
12802         struct extent_buffer *leaf;
12803         char *buf;
12804         struct btrfs_key key;
12805         int ret;
12806
12807         btrfs_init_path(&path);
12808         key.objectid = 0;
12809         key.type = BTRFS_EXTENT_ITEM_KEY;
12810         key.offset = 0;
12811         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12812         if (ret < 0) {
12813                 btrfs_release_path(&path);
12814                 return ret;
12815         }
12816
12817         buf = malloc(csum_root->fs_info->sectorsize);
12818         if (!buf) {
12819                 btrfs_release_path(&path);
12820                 return -ENOMEM;
12821         }
12822
12823         while (1) {
12824                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12825                         ret = btrfs_next_leaf(extent_root, &path);
12826                         if (ret < 0)
12827                                 break;
12828                         if (ret) {
12829                                 ret = 0;
12830                                 break;
12831                         }
12832                 }
12833                 leaf = path.nodes[0];
12834
12835                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12836                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12837                         path.slots[0]++;
12838                         continue;
12839                 }
12840
12841                 ei = btrfs_item_ptr(leaf, path.slots[0],
12842                                     struct btrfs_extent_item);
12843                 if (!(btrfs_extent_flags(leaf, ei) &
12844                       BTRFS_EXTENT_FLAG_DATA)) {
12845                         path.slots[0]++;
12846                         continue;
12847                 }
12848
12849                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12850                                     key.offset);
12851                 if (ret)
12852                         break;
12853                 path.slots[0]++;
12854         }
12855
12856         btrfs_release_path(&path);
12857         free(buf);
12858         return ret;
12859 }
12860
12861 /*
12862  * Recalculate the csum and put it into the csum tree.
12863  *
12864  * Extent tree init will wipe out all the extent info, so in that case, we
12865  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12866  * will use fs/subvol trees to init the csum tree.
12867  */
12868 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12869                           struct btrfs_root *csum_root,
12870                           int search_fs_tree)
12871 {
12872         if (search_fs_tree)
12873                 return fill_csum_tree_from_fs(trans, csum_root);
12874         else
12875                 return fill_csum_tree_from_extent(trans, csum_root);
12876 }
12877
12878 static void free_roots_info_cache(void)
12879 {
12880         if (!roots_info_cache)
12881                 return;
12882
12883         while (!cache_tree_empty(roots_info_cache)) {
12884                 struct cache_extent *entry;
12885                 struct root_item_info *rii;
12886
12887                 entry = first_cache_extent(roots_info_cache);
12888                 if (!entry)
12889                         break;
12890                 remove_cache_extent(roots_info_cache, entry);
12891                 rii = container_of(entry, struct root_item_info, cache_extent);
12892                 free(rii);
12893         }
12894
12895         free(roots_info_cache);
12896         roots_info_cache = NULL;
12897 }
12898
12899 static int build_roots_info_cache(struct btrfs_fs_info *info)
12900 {
12901         int ret = 0;
12902         struct btrfs_key key;
12903         struct extent_buffer *leaf;
12904         struct btrfs_path path;
12905
12906         if (!roots_info_cache) {
12907                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12908                 if (!roots_info_cache)
12909                         return -ENOMEM;
12910                 cache_tree_init(roots_info_cache);
12911         }
12912
12913         btrfs_init_path(&path);
12914         key.objectid = 0;
12915         key.type = BTRFS_EXTENT_ITEM_KEY;
12916         key.offset = 0;
12917         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12918         if (ret < 0)
12919                 goto out;
12920         leaf = path.nodes[0];
12921
12922         while (1) {
12923                 struct btrfs_key found_key;
12924                 struct btrfs_extent_item *ei;
12925                 struct btrfs_extent_inline_ref *iref;
12926                 int slot = path.slots[0];
12927                 int type;
12928                 u64 flags;
12929                 u64 root_id;
12930                 u8 level;
12931                 struct cache_extent *entry;
12932                 struct root_item_info *rii;
12933
12934                 if (slot >= btrfs_header_nritems(leaf)) {
12935                         ret = btrfs_next_leaf(info->extent_root, &path);
12936                         if (ret < 0) {
12937                                 break;
12938                         } else if (ret) {
12939                                 ret = 0;
12940                                 break;
12941                         }
12942                         leaf = path.nodes[0];
12943                         slot = path.slots[0];
12944                 }
12945
12946                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12947
12948                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12949                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12950                         goto next;
12951
12952                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12953                 flags = btrfs_extent_flags(leaf, ei);
12954
12955                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12956                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12957                         goto next;
12958
12959                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12960                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12961                         level = found_key.offset;
12962                 } else {
12963                         struct btrfs_tree_block_info *binfo;
12964
12965                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12966                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12967                         level = btrfs_tree_block_level(leaf, binfo);
12968                 }
12969
12970                 /*
12971                  * For a root extent, it must be of the following type and the
12972                  * first (and only one) iref in the item.
12973                  */
12974                 type = btrfs_extent_inline_ref_type(leaf, iref);
12975                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12976                         goto next;
12977
12978                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12979                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12980                 if (!entry) {
12981                         rii = malloc(sizeof(struct root_item_info));
12982                         if (!rii) {
12983                                 ret = -ENOMEM;
12984                                 goto out;
12985                         }
12986                         rii->cache_extent.start = root_id;
12987                         rii->cache_extent.size = 1;
12988                         rii->level = (u8)-1;
12989                         entry = &rii->cache_extent;
12990                         ret = insert_cache_extent(roots_info_cache, entry);
12991                         ASSERT(ret == 0);
12992                 } else {
12993                         rii = container_of(entry, struct root_item_info,
12994                                            cache_extent);
12995                 }
12996
12997                 ASSERT(rii->cache_extent.start == root_id);
12998                 ASSERT(rii->cache_extent.size == 1);
12999
13000                 if (level > rii->level || rii->level == (u8)-1) {
13001                         rii->level = level;
13002                         rii->bytenr = found_key.objectid;
13003                         rii->gen = btrfs_extent_generation(leaf, ei);
13004                         rii->node_count = 1;
13005                 } else if (level == rii->level) {
13006                         rii->node_count++;
13007                 }
13008 next:
13009                 path.slots[0]++;
13010         }
13011
13012 out:
13013         btrfs_release_path(&path);
13014
13015         return ret;
13016 }
13017
13018 static int maybe_repair_root_item(struct btrfs_path *path,
13019                                   const struct btrfs_key *root_key,
13020                                   const int read_only_mode)
13021 {
13022         const u64 root_id = root_key->objectid;
13023         struct cache_extent *entry;
13024         struct root_item_info *rii;
13025         struct btrfs_root_item ri;
13026         unsigned long offset;
13027
13028         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13029         if (!entry) {
13030                 fprintf(stderr,
13031                         "Error: could not find extent items for root %llu\n",
13032                         root_key->objectid);
13033                 return -ENOENT;
13034         }
13035
13036         rii = container_of(entry, struct root_item_info, cache_extent);
13037         ASSERT(rii->cache_extent.start == root_id);
13038         ASSERT(rii->cache_extent.size == 1);
13039
13040         if (rii->node_count != 1) {
13041                 fprintf(stderr,
13042                         "Error: could not find btree root extent for root %llu\n",
13043                         root_id);
13044                 return -ENOENT;
13045         }
13046
13047         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13048         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13049
13050         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13051             btrfs_root_level(&ri) != rii->level ||
13052             btrfs_root_generation(&ri) != rii->gen) {
13053
13054                 /*
13055                  * If we're in repair mode but our caller told us to not update
13056                  * the root item, i.e. just check if it needs to be updated, don't
13057                  * print this message, since the caller will call us again shortly
13058                  * for the same root item without read only mode (the caller will
13059                  * open a transaction first).
13060                  */
13061                 if (!(read_only_mode && repair))
13062                         fprintf(stderr,
13063                                 "%sroot item for root %llu,"
13064                                 " current bytenr %llu, current gen %llu, current level %u,"
13065                                 " new bytenr %llu, new gen %llu, new level %u\n",
13066                                 (read_only_mode ? "" : "fixing "),
13067                                 root_id,
13068                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13069                                 btrfs_root_level(&ri),
13070                                 rii->bytenr, rii->gen, rii->level);
13071
13072                 if (btrfs_root_generation(&ri) > rii->gen) {
13073                         fprintf(stderr,
13074                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13075                                 root_id, btrfs_root_generation(&ri), rii->gen);
13076                         return -EINVAL;
13077                 }
13078
13079                 if (!read_only_mode) {
13080                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13081                         btrfs_set_root_level(&ri, rii->level);
13082                         btrfs_set_root_generation(&ri, rii->gen);
13083                         write_extent_buffer(path->nodes[0], &ri,
13084                                             offset, sizeof(ri));
13085                 }
13086
13087                 return 1;
13088         }
13089
13090         return 0;
13091 }
13092
13093 /*
13094  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13095  * caused read-only snapshots to be corrupted if they were created at a moment
13096  * when the source subvolume/snapshot had orphan items. The issue was that the
13097  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13098  * node instead of the post orphan cleanup root node.
13099  * So this function, and its callees, just detects and fixes those cases. Even
13100  * though the regression was for read-only snapshots, this function applies to
13101  * any snapshot/subvolume root.
13102  * This must be run before any other repair code - not doing it so, makes other
13103  * repair code delete or modify backrefs in the extent tree for example, which
13104  * will result in an inconsistent fs after repairing the root items.
13105  */
13106 static int repair_root_items(struct btrfs_fs_info *info)
13107 {
13108         struct btrfs_path path;
13109         struct btrfs_key key;
13110         struct extent_buffer *leaf;
13111         struct btrfs_trans_handle *trans = NULL;
13112         int ret = 0;
13113         int bad_roots = 0;
13114         int need_trans = 0;
13115
13116         btrfs_init_path(&path);
13117
13118         ret = build_roots_info_cache(info);
13119         if (ret)
13120                 goto out;
13121
13122         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13123         key.type = BTRFS_ROOT_ITEM_KEY;
13124         key.offset = 0;
13125
13126 again:
13127         /*
13128          * Avoid opening and committing transactions if a leaf doesn't have
13129          * any root items that need to be fixed, so that we avoid rotating
13130          * backup roots unnecessarily.
13131          */
13132         if (need_trans) {
13133                 trans = btrfs_start_transaction(info->tree_root, 1);
13134                 if (IS_ERR(trans)) {
13135                         ret = PTR_ERR(trans);
13136                         goto out;
13137                 }
13138         }
13139
13140         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13141                                 0, trans ? 1 : 0);
13142         if (ret < 0)
13143                 goto out;
13144         leaf = path.nodes[0];
13145
13146         while (1) {
13147                 struct btrfs_key found_key;
13148
13149                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13150                         int no_more_keys = find_next_key(&path, &key);
13151
13152                         btrfs_release_path(&path);
13153                         if (trans) {
13154                                 ret = btrfs_commit_transaction(trans,
13155                                                                info->tree_root);
13156                                 trans = NULL;
13157                                 if (ret < 0)
13158                                         goto out;
13159                         }
13160                         need_trans = 0;
13161                         if (no_more_keys)
13162                                 break;
13163                         goto again;
13164                 }
13165
13166                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13167
13168                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13169                         goto next;
13170                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13171                         goto next;
13172
13173                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13174                 if (ret < 0)
13175                         goto out;
13176                 if (ret) {
13177                         if (!trans && repair) {
13178                                 need_trans = 1;
13179                                 key = found_key;
13180                                 btrfs_release_path(&path);
13181                                 goto again;
13182                         }
13183                         bad_roots++;
13184                 }
13185 next:
13186                 path.slots[0]++;
13187         }
13188         ret = 0;
13189 out:
13190         free_roots_info_cache();
13191         btrfs_release_path(&path);
13192         if (trans)
13193                 btrfs_commit_transaction(trans, info->tree_root);
13194         if (ret < 0)
13195                 return ret;
13196
13197         return bad_roots;
13198 }
13199
13200 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13201 {
13202         struct btrfs_trans_handle *trans;
13203         struct btrfs_block_group_cache *bg_cache;
13204         u64 current = 0;
13205         int ret = 0;
13206
13207         /* Clear all free space cache inodes and its extent data */
13208         while (1) {
13209                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13210                 if (!bg_cache)
13211                         break;
13212                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13213                 if (ret < 0)
13214                         return ret;
13215                 current = bg_cache->key.objectid + bg_cache->key.offset;
13216         }
13217
13218         /* Don't forget to set cache_generation to -1 */
13219         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13220         if (IS_ERR(trans)) {
13221                 error("failed to update super block cache generation");
13222                 return PTR_ERR(trans);
13223         }
13224         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13225         btrfs_commit_transaction(trans, fs_info->tree_root);
13226
13227         return ret;
13228 }
13229
13230 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13231                 int clear_version)
13232 {
13233         int ret = 0;
13234
13235         if (clear_version == 1) {
13236                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13237                         error(
13238                 "free space cache v2 detected, use --clear-space-cache v2");
13239                         ret = 1;
13240                         goto close_out;
13241                 }
13242                 printf("Clearing free space cache\n");
13243                 ret = clear_free_space_cache(fs_info);
13244                 if (ret) {
13245                         error("failed to clear free space cache");
13246                         ret = 1;
13247                 } else {
13248                         printf("Free space cache cleared\n");
13249                 }
13250         } else if (clear_version == 2) {
13251                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13252                         printf("no free space cache v2 to clear\n");
13253                         ret = 0;
13254                         goto close_out;
13255                 }
13256                 printf("Clear free space cache v2\n");
13257                 ret = btrfs_clear_free_space_tree(fs_info);
13258                 if (ret) {
13259                         error("failed to clear free space cache v2: %d", ret);
13260                         ret = 1;
13261                 } else {
13262                         printf("free space cache v2 cleared\n");
13263                 }
13264         }
13265 close_out:
13266         return ret;
13267 }
13268
13269 const char * const cmd_check_usage[] = {
13270         "btrfs check [options] <device>",
13271         "Check structural integrity of a filesystem (unmounted).",
13272         "Check structural integrity of an unmounted filesystem. Verify internal",
13273         "trees' consistency and item connectivity. In the repair mode try to",
13274         "fix the problems found. ",
13275         "WARNING: the repair mode is considered dangerous",
13276         "",
13277         "-s|--super <superblock>     use this superblock copy",
13278         "-b|--backup                 use the first valid backup root copy",
13279         "--force                     skip mount checks, repair is not possible",
13280         "--repair                    try to repair the filesystem",
13281         "--readonly                  run in read-only mode (default)",
13282         "--init-csum-tree            create a new CRC tree",
13283         "--init-extent-tree          create a new extent tree",
13284         "--mode <MODE>               allows choice of memory/IO trade-offs",
13285         "                            where MODE is one of:",
13286         "                            original - read inodes and extents to memory (requires",
13287         "                                       more memory, does less IO)",
13288         "                            lowmem   - try to use less memory but read blocks again",
13289         "                                       when needed",
13290         "--check-data-csum           verify checksums of data blocks",
13291         "-Q|--qgroup-report          print a report on qgroup consistency",
13292         "-E|--subvol-extents <subvolid>",
13293         "                            print subvolume extents and sharing state",
13294         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13295         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13296         "-p|--progress               indicate progress",
13297         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13298         NULL
13299 };
13300
13301 int cmd_check(int argc, char **argv)
13302 {
13303         struct cache_tree root_cache;
13304         struct btrfs_root *root;
13305         struct btrfs_fs_info *info;
13306         u64 bytenr = 0;
13307         u64 subvolid = 0;
13308         u64 tree_root_bytenr = 0;
13309         u64 chunk_root_bytenr = 0;
13310         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13311         int ret = 0;
13312         int err = 0;
13313         u64 num;
13314         int init_csum_tree = 0;
13315         int readonly = 0;
13316         int clear_space_cache = 0;
13317         int qgroup_report = 0;
13318         int qgroups_repaired = 0;
13319         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13320         int force = 0;
13321
13322         while(1) {
13323                 int c;
13324                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13325                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13326                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13327                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13328                         GETOPT_VAL_FORCE };
13329                 static const struct option long_options[] = {
13330                         { "super", required_argument, NULL, 's' },
13331                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13332                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13333                         { "init-csum-tree", no_argument, NULL,
13334                                 GETOPT_VAL_INIT_CSUM },
13335                         { "init-extent-tree", no_argument, NULL,
13336                                 GETOPT_VAL_INIT_EXTENT },
13337                         { "check-data-csum", no_argument, NULL,
13338                                 GETOPT_VAL_CHECK_CSUM },
13339                         { "backup", no_argument, NULL, 'b' },
13340                         { "subvol-extents", required_argument, NULL, 'E' },
13341                         { "qgroup-report", no_argument, NULL, 'Q' },
13342                         { "tree-root", required_argument, NULL, 'r' },
13343                         { "chunk-root", required_argument, NULL,
13344                                 GETOPT_VAL_CHUNK_TREE },
13345                         { "progress", no_argument, NULL, 'p' },
13346                         { "mode", required_argument, NULL,
13347                                 GETOPT_VAL_MODE },
13348                         { "clear-space-cache", required_argument, NULL,
13349                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13350                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13351                         { NULL, 0, NULL, 0}
13352                 };
13353
13354                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13355                 if (c < 0)
13356                         break;
13357                 switch(c) {
13358                         case 'a': /* ignored */ break;
13359                         case 'b':
13360                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13361                                 break;
13362                         case 's':
13363                                 num = arg_strtou64(optarg);
13364                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13365                                         error(
13366                                         "super mirror should be less than %d",
13367                                                 BTRFS_SUPER_MIRROR_MAX);
13368                                         exit(1);
13369                                 }
13370                                 bytenr = btrfs_sb_offset(((int)num));
13371                                 printf("using SB copy %llu, bytenr %llu\n", num,
13372                                        (unsigned long long)bytenr);
13373                                 break;
13374                         case 'Q':
13375                                 qgroup_report = 1;
13376                                 break;
13377                         case 'E':
13378                                 subvolid = arg_strtou64(optarg);
13379                                 break;
13380                         case 'r':
13381                                 tree_root_bytenr = arg_strtou64(optarg);
13382                                 break;
13383                         case GETOPT_VAL_CHUNK_TREE:
13384                                 chunk_root_bytenr = arg_strtou64(optarg);
13385                                 break;
13386                         case 'p':
13387                                 ctx.progress_enabled = true;
13388                                 break;
13389                         case '?':
13390                         case 'h':
13391                                 usage(cmd_check_usage);
13392                         case GETOPT_VAL_REPAIR:
13393                                 printf("enabling repair mode\n");
13394                                 repair = 1;
13395                                 ctree_flags |= OPEN_CTREE_WRITES;
13396                                 break;
13397                         case GETOPT_VAL_READONLY:
13398                                 readonly = 1;
13399                                 break;
13400                         case GETOPT_VAL_INIT_CSUM:
13401                                 printf("Creating a new CRC tree\n");
13402                                 init_csum_tree = 1;
13403                                 repair = 1;
13404                                 ctree_flags |= OPEN_CTREE_WRITES;
13405                                 break;
13406                         case GETOPT_VAL_INIT_EXTENT:
13407                                 init_extent_tree = 1;
13408                                 ctree_flags |= (OPEN_CTREE_WRITES |
13409                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13410                                 repair = 1;
13411                                 break;
13412                         case GETOPT_VAL_CHECK_CSUM:
13413                                 check_data_csum = 1;
13414                                 break;
13415                         case GETOPT_VAL_MODE:
13416                                 check_mode = parse_check_mode(optarg);
13417                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13418                                         error("unknown mode: %s", optarg);
13419                                         exit(1);
13420                                 }
13421                                 break;
13422                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13423                                 if (strcmp(optarg, "v1") == 0) {
13424                                         clear_space_cache = 1;
13425                                 } else if (strcmp(optarg, "v2") == 0) {
13426                                         clear_space_cache = 2;
13427                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13428                                 } else {
13429                                         error(
13430                 "invalid argument to --clear-space-cache, must be v1 or v2");
13431                                         exit(1);
13432                                 }
13433                                 ctree_flags |= OPEN_CTREE_WRITES;
13434                                 break;
13435                         case GETOPT_VAL_FORCE:
13436                                 force = 1;
13437                                 break;
13438                 }
13439         }
13440
13441         if (check_argc_exact(argc - optind, 1))
13442                 usage(cmd_check_usage);
13443
13444         if (ctx.progress_enabled) {
13445                 ctx.tp = TASK_NOTHING;
13446                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13447         }
13448
13449         /* This check is the only reason for --readonly to exist */
13450         if (readonly && repair) {
13451                 error("repair options are not compatible with --readonly");
13452                 exit(1);
13453         }
13454
13455         /*
13456          * experimental and dangerous
13457          */
13458         if (repair && check_mode == CHECK_MODE_LOWMEM)
13459                 warning("low-memory mode repair support is only partial");
13460
13461         radix_tree_init();
13462         cache_tree_init(&root_cache);
13463
13464         ret = check_mounted(argv[optind]);
13465         if (!force) {
13466                 if (ret < 0) {
13467                         error("could not check mount status: %s",
13468                                         strerror(-ret));
13469                         err |= !!ret;
13470                         goto err_out;
13471                 } else if (ret) {
13472                         error(
13473 "%s is currently mounted, use --force if you really intend to check the filesystem",
13474                                 argv[optind]);
13475                         ret = -EBUSY;
13476                         err |= !!ret;
13477                         goto err_out;
13478                 }
13479         } else {
13480                 if (repair) {
13481                         error("repair and --force is not yet supported");
13482                         ret = 1;
13483                         err |= !!ret;
13484                         goto err_out;
13485                 }
13486                 if (ret < 0) {
13487                         warning(
13488 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13489                                 argv[optind]);
13490                 } else if (ret) {
13491                         warning(
13492                         "filesystem mounted, continuing because of --force");
13493                 }
13494                 /* A block device is mounted in exclusive mode by kernel */
13495                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13496         }
13497
13498         /* only allow partial opening under repair mode */
13499         if (repair)
13500                 ctree_flags |= OPEN_CTREE_PARTIAL;
13501
13502         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13503                                   chunk_root_bytenr, ctree_flags);
13504         if (!info) {
13505                 error("cannot open file system");
13506                 ret = -EIO;
13507                 err |= !!ret;
13508                 goto err_out;
13509         }
13510
13511         global_info = info;
13512         root = info->fs_root;
13513         uuid_unparse(info->super_copy->fsid, uuidbuf);
13514
13515         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13516
13517         /*
13518          * Check the bare minimum before starting anything else that could rely
13519          * on it, namely the tree roots, any local consistency checks
13520          */
13521         if (!extent_buffer_uptodate(info->tree_root->node) ||
13522             !extent_buffer_uptodate(info->dev_root->node) ||
13523             !extent_buffer_uptodate(info->chunk_root->node)) {
13524                 error("critical roots corrupted, unable to check the filesystem");
13525                 err |= !!ret;
13526                 ret = -EIO;
13527                 goto close_out;
13528         }
13529
13530         if (clear_space_cache) {
13531                 ret = do_clear_free_space_cache(info, clear_space_cache);
13532                 err |= !!ret;
13533                 goto close_out;
13534         }
13535
13536         /*
13537          * repair mode will force us to commit transaction which
13538          * will make us fail to load log tree when mounting.
13539          */
13540         if (repair && btrfs_super_log_root(info->super_copy)) {
13541                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13542                 if (!ret) {
13543                         ret = 1;
13544                         err |= !!ret;
13545                         goto close_out;
13546                 }
13547                 ret = zero_log_tree(root);
13548                 err |= !!ret;
13549                 if (ret) {
13550                         error("failed to zero log tree: %d", ret);
13551                         goto close_out;
13552                 }
13553         }
13554
13555         if (qgroup_report) {
13556                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13557                        uuidbuf);
13558                 ret = qgroup_verify_all(info);
13559                 err |= !!ret;
13560                 if (ret == 0)
13561                         report_qgroups(1);
13562                 goto close_out;
13563         }
13564         if (subvolid) {
13565                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13566                        subvolid, argv[optind], uuidbuf);
13567                 ret = print_extent_state(info, subvolid);
13568                 err |= !!ret;
13569                 goto close_out;
13570         }
13571
13572         if (init_extent_tree || init_csum_tree) {
13573                 struct btrfs_trans_handle *trans;
13574
13575                 trans = btrfs_start_transaction(info->extent_root, 0);
13576                 if (IS_ERR(trans)) {
13577                         error("error starting transaction");
13578                         ret = PTR_ERR(trans);
13579                         err |= !!ret;
13580                         goto close_out;
13581                 }
13582
13583                 if (init_extent_tree) {
13584                         printf("Creating a new extent tree\n");
13585                         ret = reinit_extent_tree(trans, info);
13586                         err |= !!ret;
13587                         if (ret)
13588                                 goto close_out;
13589                 }
13590
13591                 if (init_csum_tree) {
13592                         printf("Reinitialize checksum tree\n");
13593                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13594                         if (ret) {
13595                                 error("checksum tree initialization failed: %d",
13596                                                 ret);
13597                                 ret = -EIO;
13598                                 err |= !!ret;
13599                                 goto close_out;
13600                         }
13601
13602                         ret = fill_csum_tree(trans, info->csum_root,
13603                                              init_extent_tree);
13604                         err |= !!ret;
13605                         if (ret) {
13606                                 error("checksum tree refilling failed: %d", ret);
13607                                 return -EIO;
13608                         }
13609                 }
13610                 /*
13611                  * Ok now we commit and run the normal fsck, which will add
13612                  * extent entries for all of the items it finds.
13613                  */
13614                 ret = btrfs_commit_transaction(trans, info->extent_root);
13615                 err |= !!ret;
13616                 if (ret)
13617                         goto close_out;
13618         }
13619         if (!extent_buffer_uptodate(info->extent_root->node)) {
13620                 error("critical: extent_root, unable to check the filesystem");
13621                 ret = -EIO;
13622                 err |= !!ret;
13623                 goto close_out;
13624         }
13625         if (!extent_buffer_uptodate(info->csum_root->node)) {
13626                 error("critical: csum_root, unable to check the filesystem");
13627                 ret = -EIO;
13628                 err |= !!ret;
13629                 goto close_out;
13630         }
13631
13632         ret = do_check_chunks_and_extents(info);
13633         err |= !!ret;
13634         if (ret)
13635                 error(
13636                 "errors found in extent allocation tree or chunk allocation");
13637
13638         ret = repair_root_items(info);
13639         err |= !!ret;
13640         if (ret < 0) {
13641                 error("failed to repair root items: %s", strerror(-ret));
13642                 goto close_out;
13643         }
13644         if (repair) {
13645                 fprintf(stderr, "Fixed %d roots.\n", ret);
13646                 ret = 0;
13647         } else if (ret > 0) {
13648                 fprintf(stderr,
13649                        "Found %d roots with an outdated root item.\n",
13650                        ret);
13651                 fprintf(stderr,
13652                         "Please run a filesystem check with the option --repair to fix them.\n");
13653                 ret = 1;
13654                 err |= !!ret;
13655                 goto close_out;
13656         }
13657
13658         if (!ctx.progress_enabled) {
13659                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13660                         fprintf(stderr, "checking free space tree\n");
13661                 else
13662                         fprintf(stderr, "checking free space cache\n");
13663         }
13664         ret = check_space_cache(root);
13665         err |= !!ret;
13666         if (ret) {
13667                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13668                         error("errors found in free space tree");
13669                 else
13670                         error("errors found in free space cache");
13671                 goto out;
13672         }
13673
13674         /*
13675          * We used to have to have these hole extents in between our real
13676          * extents so if we don't have this flag set we need to make sure there
13677          * are no gaps in the file extents for inodes, otherwise we can just
13678          * ignore it when this happens.
13679          */
13680         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13681         ret = do_check_fs_roots(info, &root_cache);
13682         err |= !!ret;
13683         if (ret) {
13684                 error("errors found in fs roots");
13685                 goto out;
13686         }
13687
13688         fprintf(stderr, "checking csums\n");
13689         ret = check_csums(root);
13690         err |= !!ret;
13691         if (ret) {
13692                 error("errors found in csum tree");
13693                 goto out;
13694         }
13695
13696         fprintf(stderr, "checking root refs\n");
13697         /* For low memory mode, check_fs_roots_v2 handles root refs */
13698         if (check_mode != CHECK_MODE_LOWMEM) {
13699                 ret = check_root_refs(root, &root_cache);
13700                 err |= !!ret;
13701                 if (ret) {
13702                         error("errors found in root refs");
13703                         goto out;
13704                 }
13705         }
13706
13707         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13708                 struct extent_buffer *eb;
13709
13710                 eb = list_first_entry(&root->fs_info->recow_ebs,
13711                                       struct extent_buffer, recow);
13712                 list_del_init(&eb->recow);
13713                 ret = recow_extent_buffer(root, eb);
13714                 err |= !!ret;
13715                 if (ret) {
13716                         error("fails to fix transid errors");
13717                         break;
13718                 }
13719         }
13720
13721         while (!list_empty(&delete_items)) {
13722                 struct bad_item *bad;
13723
13724                 bad = list_first_entry(&delete_items, struct bad_item, list);
13725                 list_del_init(&bad->list);
13726                 if (repair) {
13727                         ret = delete_bad_item(root, bad);
13728                         err |= !!ret;
13729                 }
13730                 free(bad);
13731         }
13732
13733         if (info->quota_enabled) {
13734                 fprintf(stderr, "checking quota groups\n");
13735                 ret = qgroup_verify_all(info);
13736                 err |= !!ret;
13737                 if (ret) {
13738                         error("failed to check quota groups");
13739                         goto out;
13740                 }
13741                 report_qgroups(0);
13742                 ret = repair_qgroups(info, &qgroups_repaired);
13743                 err |= !!ret;
13744                 if (err) {
13745                         error("failed to repair quota groups");
13746                         goto out;
13747                 }
13748                 ret = 0;
13749         }
13750
13751         if (!list_empty(&root->fs_info->recow_ebs)) {
13752                 error("transid errors in file system");
13753                 ret = 1;
13754                 err |= !!ret;
13755         }
13756 out:
13757         printf("found %llu bytes used, ",
13758                (unsigned long long)bytes_used);
13759         if (err)
13760                 printf("error(s) found\n");
13761         else
13762                 printf("no error found\n");
13763         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13764         printf("total tree bytes: %llu\n",
13765                (unsigned long long)total_btree_bytes);
13766         printf("total fs tree bytes: %llu\n",
13767                (unsigned long long)total_fs_tree_bytes);
13768         printf("total extent tree bytes: %llu\n",
13769                (unsigned long long)total_extent_tree_bytes);
13770         printf("btree space waste bytes: %llu\n",
13771                (unsigned long long)btree_space_waste);
13772         printf("file data blocks allocated: %llu\n referenced %llu\n",
13773                 (unsigned long long)data_bytes_allocated,
13774                 (unsigned long long)data_bytes_referenced);
13775
13776         free_qgroup_counts();
13777         free_root_recs_tree(&root_cache);
13778 close_out:
13779         close_ctree(root);
13780 err_out:
13781         if (ctx.progress_enabled)
13782                 task_deinit(ctx.info);
13783
13784         return err;
13785 }