btrfs-progs: check: introduce print_inode_ref()
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 {
138         return container_of(back, struct data_backref, node);
139 }
140
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
142 {
143         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145         struct data_backref *back1 = to_data_backref(ext1);
146         struct data_backref *back2 = to_data_backref(ext2);
147
148         WARN_ON(!ext1->is_data);
149         WARN_ON(!ext2->is_data);
150
151         /* parent and root are a union, so this covers both */
152         if (back1->parent > back2->parent)
153                 return 1;
154         if (back1->parent < back2->parent)
155                 return -1;
156
157         /* This is a full backref and the parents match. */
158         if (back1->node.full_backref)
159                 return 0;
160
161         if (back1->owner > back2->owner)
162                 return 1;
163         if (back1->owner < back2->owner)
164                 return -1;
165
166         if (back1->offset > back2->offset)
167                 return 1;
168         if (back1->offset < back2->offset)
169                 return -1;
170
171         if (back1->found_ref && back2->found_ref) {
172                 if (back1->disk_bytenr > back2->disk_bytenr)
173                         return 1;
174                 if (back1->disk_bytenr < back2->disk_bytenr)
175                         return -1;
176
177                 if (back1->bytes > back2->bytes)
178                         return 1;
179                 if (back1->bytes < back2->bytes)
180                         return -1;
181         }
182
183         return 0;
184 }
185
186 /*
187  * Much like data_backref, just removed the undetermined members
188  * and change it to use list_head.
189  * During extent scan, it is stored in root->orphan_data_extent.
190  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
191  */
192 struct orphan_data_extent {
193         struct list_head list;
194         u64 root;
195         u64 objectid;
196         u64 offset;
197         u64 disk_bytenr;
198         u64 disk_len;
199 };
200
201 struct tree_backref {
202         struct extent_backref node;
203         union {
204                 u64 parent;
205                 u64 root;
206         };
207 };
208
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
210 {
211         return container_of(back, struct tree_backref, node);
212 }
213
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
215 {
216         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218         struct tree_backref *back1 = to_tree_backref(ext1);
219         struct tree_backref *back2 = to_tree_backref(ext2);
220
221         WARN_ON(ext1->is_data);
222         WARN_ON(ext2->is_data);
223
224         /* parent and root are a union, so this covers both */
225         if (back1->parent > back2->parent)
226                 return 1;
227         if (back1->parent < back2->parent)
228                 return -1;
229
230         return 0;
231 }
232
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
234 {
235         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
237
238         if (ext1->is_data > ext2->is_data)
239                 return 1;
240
241         if (ext1->is_data < ext2->is_data)
242                 return -1;
243
244         if (ext1->full_backref > ext2->full_backref)
245                 return 1;
246         if (ext1->full_backref < ext2->full_backref)
247                 return -1;
248
249         if (ext1->is_data)
250                 return compare_data_backref(node1, node2);
251         else
252                 return compare_tree_backref(node1, node2);
253 }
254
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
257
258 struct extent_record {
259         struct list_head backrefs;
260         struct list_head dups;
261         struct rb_root backref_tree;
262         struct list_head list;
263         struct cache_extent cache;
264         struct btrfs_disk_key parent_key;
265         u64 start;
266         u64 max_size;
267         u64 nr;
268         u64 refs;
269         u64 extent_item_refs;
270         u64 generation;
271         u64 parent_generation;
272         u64 info_objectid;
273         u32 num_duplicates;
274         u8 info_level;
275         unsigned int flag_block_full_backref:2;
276         unsigned int found_rec:1;
277         unsigned int content_checked:1;
278         unsigned int owner_ref_checked:1;
279         unsigned int is_root:1;
280         unsigned int metadata:1;
281         unsigned int bad_full_backref:1;
282         unsigned int crossing_stripes:1;
283         unsigned int wrong_chunk_type:1;
284 };
285
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
287 {
288         return container_of(entry, struct extent_record, list);
289 }
290
291 struct inode_backref {
292         struct list_head list;
293         unsigned int found_dir_item:1;
294         unsigned int found_dir_index:1;
295         unsigned int found_inode_ref:1;
296         u8 filetype;
297         u8 ref_type;
298         int errors;
299         u64 dir;
300         u64 index;
301         u16 namelen;
302         char name[0];
303 };
304
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
306 {
307         return list_entry(entry, struct inode_backref, list);
308 }
309
310 struct root_item_record {
311         struct list_head list;
312         u64 objectid;
313         u64 bytenr;
314         u64 last_snapshot;
315         u8 level;
316         u8 drop_level;
317         struct btrfs_key drop_key;
318 };
319
320 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
322 #define REF_ERR_NO_INODE_REF            (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
325 #define REF_ERR_DUP_INODE_REF           (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF             (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
333
334 struct file_extent_hole {
335         struct rb_node node;
336         u64 start;
337         u64 len;
338 };
339
340 struct inode_record {
341         struct list_head backrefs;
342         unsigned int checked:1;
343         unsigned int merging:1;
344         unsigned int found_inode_item:1;
345         unsigned int found_dir_item:1;
346         unsigned int found_file_extent:1;
347         unsigned int found_csum_item:1;
348         unsigned int some_csum_missing:1;
349         unsigned int nodatasum:1;
350         int errors;
351
352         u64 ino;
353         u32 nlink;
354         u32 imode;
355         u64 isize;
356         u64 nbytes;
357
358         u32 found_link;
359         u64 found_size;
360         u64 extent_start;
361         u64 extent_end;
362         struct rb_root holes;
363         struct list_head orphan_extents;
364
365         u32 refs;
366 };
367
368 #define I_ERR_NO_INODE_ITEM             (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
383
384 struct root_backref {
385         struct list_head list;
386         unsigned int found_dir_item:1;
387         unsigned int found_dir_index:1;
388         unsigned int found_back_ref:1;
389         unsigned int found_forward_ref:1;
390         unsigned int reachable:1;
391         int errors;
392         u64 ref_root;
393         u64 dir;
394         u64 index;
395         u16 namelen;
396         char name[0];
397 };
398
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
400 {
401         return list_entry(entry, struct root_backref, list);
402 }
403
404 struct root_record {
405         struct list_head backrefs;
406         struct cache_extent cache;
407         unsigned int found_root_item:1;
408         u64 objectid;
409         u32 found_ref;
410 };
411
412 struct ptr_node {
413         struct cache_extent cache;
414         void *data;
415 };
416
417 struct shared_node {
418         struct cache_extent cache;
419         struct cache_tree root_cache;
420         struct cache_tree inode_cache;
421         struct inode_record *current;
422         u32 refs;
423 };
424
425 struct block_info {
426         u64 start;
427         u32 size;
428 };
429
430 struct walk_control {
431         struct cache_tree shared;
432         struct shared_node *nodes[BTRFS_MAX_LEVEL];
433         int active_node;
434         int root_level;
435 };
436
437 struct bad_item {
438         struct btrfs_key key;
439         u64 root_id;
440         struct list_head list;
441 };
442
443 struct extent_entry {
444         u64 bytenr;
445         u64 bytes;
446         int count;
447         int broken;
448         struct list_head list;
449 };
450
451 struct root_item_info {
452         /* level of the root */
453         u8 level;
454         /* number of nodes at this level, must be 1 for a root */
455         int node_count;
456         u64 bytenr;
457         u64 gen;
458         struct cache_extent cache_extent;
459 };
460
461 /*
462  * Error bit for low memory mode check.
463  *
464  * Currently no caller cares about it yet.  Just internal use for error
465  * classification.
466  */
467 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH     (1 << 8)
477
478 static void *print_status_check(void *p)
479 {
480         struct task_ctx *priv = p;
481         const char work_indicator[] = { '.', 'o', 'O', 'o' };
482         uint32_t count = 0;
483         static char *task_position_string[] = {
484                 "checking extents",
485                 "checking free space cache",
486                 "checking fs roots",
487         };
488
489         task_period_start(priv->info, 1000 /* 1s */);
490
491         if (priv->tp == TASK_NOTHING)
492                 return NULL;
493
494         while (1) {
495                 printf("%s [%c]\r", task_position_string[priv->tp],
496                                 work_indicator[count % 4]);
497                 count++;
498                 fflush(stdout);
499                 task_period_wait(priv->info);
500         }
501         return NULL;
502 }
503
504 static int print_status_return(void *p)
505 {
506         printf("\n");
507         fflush(stdout);
508
509         return 0;
510 }
511
512 static enum btrfs_check_mode parse_check_mode(const char *str)
513 {
514         if (strcmp(str, "lowmem") == 0)
515                 return CHECK_MODE_LOWMEM;
516         if (strcmp(str, "orig") == 0)
517                 return CHECK_MODE_ORIGINAL;
518         if (strcmp(str, "original") == 0)
519                 return CHECK_MODE_ORIGINAL;
520
521         return CHECK_MODE_UNKNOWN;
522 }
523
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
526 {
527         struct file_extent_hole *hole;
528
529         if (RB_EMPTY_ROOT(holes))
530                 return (u64)-1;
531
532         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
533         return hole->start;
534 }
535
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
537 {
538         struct file_extent_hole *hole1;
539         struct file_extent_hole *hole2;
540
541         hole1 = rb_entry(node1, struct file_extent_hole, node);
542         hole2 = rb_entry(node2, struct file_extent_hole, node);
543
544         if (hole1->start > hole2->start)
545                 return -1;
546         if (hole1->start < hole2->start)
547                 return 1;
548         /* Now hole1->start == hole2->start */
549         if (hole1->len >= hole2->len)
550                 /*
551                  * Hole 1 will be merge center
552                  * Same hole will be merged later
553                  */
554                 return -1;
555         /* Hole 2 will be merge center */
556         return 1;
557 }
558
559 /*
560  * Add a hole to the record
561  *
562  * This will do hole merge for copy_file_extent_holes(),
563  * which will ensure there won't be continuous holes.
564  */
565 static int add_file_extent_hole(struct rb_root *holes,
566                                 u64 start, u64 len)
567 {
568         struct file_extent_hole *hole;
569         struct file_extent_hole *prev = NULL;
570         struct file_extent_hole *next = NULL;
571
572         hole = malloc(sizeof(*hole));
573         if (!hole)
574                 return -ENOMEM;
575         hole->start = start;
576         hole->len = len;
577         /* Since compare will not return 0, no -EEXIST will happen */
578         rb_insert(holes, &hole->node, compare_hole);
579
580         /* simple merge with previous hole */
581         if (rb_prev(&hole->node))
582                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
583                                 node);
584         if (prev && prev->start + prev->len >= hole->start) {
585                 hole->len = hole->start + hole->len - prev->start;
586                 hole->start = prev->start;
587                 rb_erase(&prev->node, holes);
588                 free(prev);
589                 prev = NULL;
590         }
591
592         /* iterate merge with next holes */
593         while (1) {
594                 if (!rb_next(&hole->node))
595                         break;
596                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
597                                         node);
598                 if (hole->start + hole->len >= next->start) {
599                         if (hole->start + hole->len <= next->start + next->len)
600                                 hole->len = next->start + next->len -
601                                             hole->start;
602                         rb_erase(&next->node, holes);
603                         free(next);
604                         next = NULL;
605                 } else
606                         break;
607         }
608         return 0;
609 }
610
611 static int compare_hole_range(struct rb_node *node, void *data)
612 {
613         struct file_extent_hole *hole;
614         u64 start;
615
616         hole = (struct file_extent_hole *)data;
617         start = hole->start;
618
619         hole = rb_entry(node, struct file_extent_hole, node);
620         if (start < hole->start)
621                 return -1;
622         if (start >= hole->start && start < hole->start + hole->len)
623                 return 0;
624         return 1;
625 }
626
627 /*
628  * Delete a hole in the record
629  *
630  * This will do the hole split and is much restrict than add.
631  */
632 static int del_file_extent_hole(struct rb_root *holes,
633                                 u64 start, u64 len)
634 {
635         struct file_extent_hole *hole;
636         struct file_extent_hole tmp;
637         u64 prev_start = 0;
638         u64 prev_len = 0;
639         u64 next_start = 0;
640         u64 next_len = 0;
641         struct rb_node *node;
642         int have_prev = 0;
643         int have_next = 0;
644         int ret = 0;
645
646         tmp.start = start;
647         tmp.len = len;
648         node = rb_search(holes, &tmp, compare_hole_range, NULL);
649         if (!node)
650                 return -EEXIST;
651         hole = rb_entry(node, struct file_extent_hole, node);
652         if (start + len > hole->start + hole->len)
653                 return -EEXIST;
654
655         /*
656          * Now there will be no overlap, delete the hole and re-add the
657          * split(s) if they exists.
658          */
659         if (start > hole->start) {
660                 prev_start = hole->start;
661                 prev_len = start - hole->start;
662                 have_prev = 1;
663         }
664         if (hole->start + hole->len > start + len) {
665                 next_start = start + len;
666                 next_len = hole->start + hole->len - start - len;
667                 have_next = 1;
668         }
669         rb_erase(node, holes);
670         free(hole);
671         if (have_prev) {
672                 ret = add_file_extent_hole(holes, prev_start, prev_len);
673                 if (ret < 0)
674                         return ret;
675         }
676         if (have_next) {
677                 ret = add_file_extent_hole(holes, next_start, next_len);
678                 if (ret < 0)
679                         return ret;
680         }
681         return 0;
682 }
683
684 static int copy_file_extent_holes(struct rb_root *dst,
685                                   struct rb_root *src)
686 {
687         struct file_extent_hole *hole;
688         struct rb_node *node;
689         int ret = 0;
690
691         node = rb_first(src);
692         while (node) {
693                 hole = rb_entry(node, struct file_extent_hole, node);
694                 ret = add_file_extent_hole(dst, hole->start, hole->len);
695                 if (ret)
696                         break;
697                 node = rb_next(node);
698         }
699         return ret;
700 }
701
702 static void free_file_extent_holes(struct rb_root *holes)
703 {
704         struct rb_node *node;
705         struct file_extent_hole *hole;
706
707         node = rb_first(holes);
708         while (node) {
709                 hole = rb_entry(node, struct file_extent_hole, node);
710                 rb_erase(node, holes);
711                 free(hole);
712                 node = rb_first(holes);
713         }
714 }
715
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
717
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719                                  struct btrfs_root *root)
720 {
721         if (root->last_trans != trans->transid) {
722                 root->track_dirty = 1;
723                 root->last_trans = trans->transid;
724                 root->commit_root = root->node;
725                 extent_buffer_get(root->node);
726         }
727 }
728
729 static u8 imode_to_type(u32 imode)
730 {
731 #define S_SHIFT 12
732         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
734                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
735                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
736                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
737                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
738                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
739                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
740         };
741
742         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
743 #undef S_SHIFT
744 }
745
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
747 {
748         struct device_record *rec1;
749         struct device_record *rec2;
750
751         rec1 = rb_entry(node1, struct device_record, node);
752         rec2 = rb_entry(node2, struct device_record, node);
753         if (rec1->devid > rec2->devid)
754                 return -1;
755         else if (rec1->devid < rec2->devid)
756                 return 1;
757         else
758                 return 0;
759 }
760
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
762 {
763         struct inode_record *rec;
764         struct inode_backref *backref;
765         struct inode_backref *orig;
766         struct inode_backref *tmp;
767         struct orphan_data_extent *src_orphan;
768         struct orphan_data_extent *dst_orphan;
769         struct rb_node *rb;
770         size_t size;
771         int ret;
772
773         rec = malloc(sizeof(*rec));
774         if (!rec)
775                 return ERR_PTR(-ENOMEM);
776         memcpy(rec, orig_rec, sizeof(*rec));
777         rec->refs = 1;
778         INIT_LIST_HEAD(&rec->backrefs);
779         INIT_LIST_HEAD(&rec->orphan_extents);
780         rec->holes = RB_ROOT;
781
782         list_for_each_entry(orig, &orig_rec->backrefs, list) {
783                 size = sizeof(*orig) + orig->namelen + 1;
784                 backref = malloc(size);
785                 if (!backref) {
786                         ret = -ENOMEM;
787                         goto cleanup;
788                 }
789                 memcpy(backref, orig, size);
790                 list_add_tail(&backref->list, &rec->backrefs);
791         }
792         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793                 dst_orphan = malloc(sizeof(*dst_orphan));
794                 if (!dst_orphan) {
795                         ret = -ENOMEM;
796                         goto cleanup;
797                 }
798                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
800         }
801         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
802         if (ret < 0)
803                 goto cleanup_rb;
804
805         return rec;
806
807 cleanup_rb:
808         rb = rb_first(&rec->holes);
809         while (rb) {
810                 struct file_extent_hole *hole;
811
812                 hole = rb_entry(rb, struct file_extent_hole, node);
813                 rb = rb_next(rb);
814                 free(hole);
815         }
816
817 cleanup:
818         if (!list_empty(&rec->backrefs))
819                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820                         list_del(&orig->list);
821                         free(orig);
822                 }
823
824         if (!list_empty(&rec->orphan_extents))
825                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826                         list_del(&orig->list);
827                         free(orig);
828                 }
829
830         free(rec);
831
832         return ERR_PTR(ret);
833 }
834
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
836                                       u64 objectid)
837 {
838         struct orphan_data_extent *orphan;
839
840         if (list_empty(orphan_extents))
841                 return;
842         printf("The following data extent is lost in tree %llu:\n",
843                objectid);
844         list_for_each_entry(orphan, orphan_extents, list) {
845                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
847                        orphan->disk_len);
848         }
849 }
850
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
852 {
853         u64 root_objectid = root->root_key.objectid;
854         int errors = rec->errors;
855
856         if (!errors)
857                 return;
858         /* reloc root errors, we print its corresponding fs root objectid*/
859         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860                 root_objectid = root->root_key.offset;
861                 fprintf(stderr, "reloc");
862         }
863         fprintf(stderr, "root %llu inode %llu errors %x",
864                 (unsigned long long) root_objectid,
865                 (unsigned long long) rec->ino, rec->errors);
866
867         if (errors & I_ERR_NO_INODE_ITEM)
868                 fprintf(stderr, ", no inode item");
869         if (errors & I_ERR_NO_ORPHAN_ITEM)
870                 fprintf(stderr, ", no orphan item");
871         if (errors & I_ERR_DUP_INODE_ITEM)
872                 fprintf(stderr, ", dup inode item");
873         if (errors & I_ERR_DUP_DIR_INDEX)
874                 fprintf(stderr, ", dup dir index");
875         if (errors & I_ERR_ODD_DIR_ITEM)
876                 fprintf(stderr, ", odd dir item");
877         if (errors & I_ERR_ODD_FILE_EXTENT)
878                 fprintf(stderr, ", odd file extent");
879         if (errors & I_ERR_BAD_FILE_EXTENT)
880                 fprintf(stderr, ", bad file extent");
881         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882                 fprintf(stderr, ", file extent overlap");
883         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884                 fprintf(stderr, ", file extent discount");
885         if (errors & I_ERR_DIR_ISIZE_WRONG)
886                 fprintf(stderr, ", dir isize wrong");
887         if (errors & I_ERR_FILE_NBYTES_WRONG)
888                 fprintf(stderr, ", nbytes wrong");
889         if (errors & I_ERR_ODD_CSUM_ITEM)
890                 fprintf(stderr, ", odd csum item");
891         if (errors & I_ERR_SOME_CSUM_MISSING)
892                 fprintf(stderr, ", some csum missing");
893         if (errors & I_ERR_LINK_COUNT_WRONG)
894                 fprintf(stderr, ", link count wrong");
895         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896                 fprintf(stderr, ", orphan file extent");
897         fprintf(stderr, "\n");
898         /* Print the orphan extents if needed */
899         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
901
902         /* Print the holes if needed */
903         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904                 struct file_extent_hole *hole;
905                 struct rb_node *node;
906                 int found = 0;
907
908                 node = rb_first(&rec->holes);
909                 fprintf(stderr, "Found file extent holes:\n");
910                 while (node) {
911                         found = 1;
912                         hole = rb_entry(node, struct file_extent_hole, node);
913                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
914                                 hole->start, hole->len);
915                         node = rb_next(node);
916                 }
917                 if (!found)
918                         fprintf(stderr, "\tstart: 0, len: %llu\n",
919                                 round_up(rec->isize,
920                                          root->fs_info->sectorsize));
921         }
922 }
923
924 static void print_ref_error(int errors)
925 {
926         if (errors & REF_ERR_NO_DIR_ITEM)
927                 fprintf(stderr, ", no dir item");
928         if (errors & REF_ERR_NO_DIR_INDEX)
929                 fprintf(stderr, ", no dir index");
930         if (errors & REF_ERR_NO_INODE_REF)
931                 fprintf(stderr, ", no inode ref");
932         if (errors & REF_ERR_DUP_DIR_ITEM)
933                 fprintf(stderr, ", dup dir item");
934         if (errors & REF_ERR_DUP_DIR_INDEX)
935                 fprintf(stderr, ", dup dir index");
936         if (errors & REF_ERR_DUP_INODE_REF)
937                 fprintf(stderr, ", dup inode ref");
938         if (errors & REF_ERR_INDEX_UNMATCH)
939                 fprintf(stderr, ", index mismatch");
940         if (errors & REF_ERR_FILETYPE_UNMATCH)
941                 fprintf(stderr, ", filetype mismatch");
942         if (errors & REF_ERR_NAME_TOO_LONG)
943                 fprintf(stderr, ", name too long");
944         if (errors & REF_ERR_NO_ROOT_REF)
945                 fprintf(stderr, ", no root ref");
946         if (errors & REF_ERR_NO_ROOT_BACKREF)
947                 fprintf(stderr, ", no root backref");
948         if (errors & REF_ERR_DUP_ROOT_REF)
949                 fprintf(stderr, ", dup root ref");
950         if (errors & REF_ERR_DUP_ROOT_BACKREF)
951                 fprintf(stderr, ", dup root backref");
952         fprintf(stderr, "\n");
953 }
954
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956                                           u64 ino, int mod)
957 {
958         struct ptr_node *node;
959         struct cache_extent *cache;
960         struct inode_record *rec = NULL;
961         int ret;
962
963         cache = lookup_cache_extent(inode_cache, ino, 1);
964         if (cache) {
965                 node = container_of(cache, struct ptr_node, cache);
966                 rec = node->data;
967                 if (mod && rec->refs > 1) {
968                         node->data = clone_inode_rec(rec);
969                         if (IS_ERR(node->data))
970                                 return node->data;
971                         rec->refs--;
972                         rec = node->data;
973                 }
974         } else if (mod) {
975                 rec = calloc(1, sizeof(*rec));
976                 if (!rec)
977                         return ERR_PTR(-ENOMEM);
978                 rec->ino = ino;
979                 rec->extent_start = (u64)-1;
980                 rec->refs = 1;
981                 INIT_LIST_HEAD(&rec->backrefs);
982                 INIT_LIST_HEAD(&rec->orphan_extents);
983                 rec->holes = RB_ROOT;
984
985                 node = malloc(sizeof(*node));
986                 if (!node) {
987                         free(rec);
988                         return ERR_PTR(-ENOMEM);
989                 }
990                 node->cache.start = ino;
991                 node->cache.size = 1;
992                 node->data = rec;
993
994                 if (ino == BTRFS_FREE_INO_OBJECTID)
995                         rec->found_link = 1;
996
997                 ret = insert_cache_extent(inode_cache, &node->cache);
998                 if (ret)
999                         return ERR_PTR(-EEXIST);
1000         }
1001         return rec;
1002 }
1003
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1005 {
1006         struct orphan_data_extent *orphan;
1007
1008         while (!list_empty(orphan_extents)) {
1009                 orphan = list_entry(orphan_extents->next,
1010                                     struct orphan_data_extent, list);
1011                 list_del(&orphan->list);
1012                 free(orphan);
1013         }
1014 }
1015
1016 static void free_inode_rec(struct inode_record *rec)
1017 {
1018         struct inode_backref *backref;
1019
1020         if (--rec->refs > 0)
1021                 return;
1022
1023         while (!list_empty(&rec->backrefs)) {
1024                 backref = to_inode_backref(rec->backrefs.next);
1025                 list_del(&backref->list);
1026                 free(backref);
1027         }
1028         free_orphan_data_extents(&rec->orphan_extents);
1029         free_file_extent_holes(&rec->holes);
1030         free(rec);
1031 }
1032
1033 static int can_free_inode_rec(struct inode_record *rec)
1034 {
1035         if (!rec->errors && rec->checked && rec->found_inode_item &&
1036             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1037                 return 1;
1038         return 0;
1039 }
1040
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042                                  struct inode_record *rec)
1043 {
1044         struct cache_extent *cache;
1045         struct inode_backref *tmp, *backref;
1046         struct ptr_node *node;
1047         u8 filetype;
1048
1049         if (!rec->found_inode_item)
1050                 return;
1051
1052         filetype = imode_to_type(rec->imode);
1053         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054                 if (backref->found_dir_item && backref->found_dir_index) {
1055                         if (backref->filetype != filetype)
1056                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057                         if (!backref->errors && backref->found_inode_ref &&
1058                             rec->nlink == rec->found_link) {
1059                                 list_del(&backref->list);
1060                                 free(backref);
1061                         }
1062                 }
1063         }
1064
1065         if (!rec->checked || rec->merging)
1066                 return;
1067
1068         if (S_ISDIR(rec->imode)) {
1069                 if (rec->found_size != rec->isize)
1070                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071                 if (rec->found_file_extent)
1072                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074                 if (rec->found_dir_item)
1075                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1076                 if (rec->found_size != rec->nbytes)
1077                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078                 if (rec->nlink > 0 && !no_holes &&
1079                     (rec->extent_end < rec->isize ||
1080                      first_extent_gap(&rec->holes) < rec->isize))
1081                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082         }
1083
1084         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085                 if (rec->found_csum_item && rec->nodatasum)
1086                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087                 if (rec->some_csum_missing && !rec->nodatasum)
1088                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089         }
1090
1091         BUG_ON(rec->refs != 1);
1092         if (can_free_inode_rec(rec)) {
1093                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094                 node = container_of(cache, struct ptr_node, cache);
1095                 BUG_ON(node->data != rec);
1096                 remove_cache_extent(inode_cache, &node->cache);
1097                 free(node);
1098                 free_inode_rec(rec);
1099         }
1100 }
1101
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1103 {
1104         struct btrfs_path path;
1105         struct btrfs_key key;
1106         int ret;
1107
1108         key.objectid = BTRFS_ORPHAN_OBJECTID;
1109         key.type = BTRFS_ORPHAN_ITEM_KEY;
1110         key.offset = ino;
1111
1112         btrfs_init_path(&path);
1113         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114         btrfs_release_path(&path);
1115         if (ret > 0)
1116                 ret = -ENOENT;
1117         return ret;
1118 }
1119
1120 static int process_inode_item(struct extent_buffer *eb,
1121                               int slot, struct btrfs_key *key,
1122                               struct shared_node *active_node)
1123 {
1124         struct inode_record *rec;
1125         struct btrfs_inode_item *item;
1126
1127         rec = active_node->current;
1128         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129         if (rec->found_inode_item) {
1130                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131                 return 1;
1132         }
1133         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134         rec->nlink = btrfs_inode_nlink(eb, item);
1135         rec->isize = btrfs_inode_size(eb, item);
1136         rec->nbytes = btrfs_inode_nbytes(eb, item);
1137         rec->imode = btrfs_inode_mode(eb, item);
1138         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1139                 rec->nodatasum = 1;
1140         rec->found_inode_item = 1;
1141         if (rec->nlink == 0)
1142                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143         maybe_free_inode_rec(&active_node->inode_cache, rec);
1144         return 0;
1145 }
1146
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1148                                                 const char *name,
1149                                                 int namelen, u64 dir)
1150 {
1151         struct inode_backref *backref;
1152
1153         list_for_each_entry(backref, &rec->backrefs, list) {
1154                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1155                         break;
1156                 if (backref->dir != dir || backref->namelen != namelen)
1157                         continue;
1158                 if (memcmp(name, backref->name, namelen))
1159                         continue;
1160                 return backref;
1161         }
1162
1163         backref = malloc(sizeof(*backref) + namelen + 1);
1164         if (!backref)
1165                 return NULL;
1166         memset(backref, 0, sizeof(*backref));
1167         backref->dir = dir;
1168         backref->namelen = namelen;
1169         memcpy(backref->name, name, namelen);
1170         backref->name[namelen] = '\0';
1171         list_add_tail(&backref->list, &rec->backrefs);
1172         return backref;
1173 }
1174
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176                              u64 ino, u64 dir, u64 index,
1177                              const char *name, int namelen,
1178                              u8 filetype, u8 itemtype, int errors)
1179 {
1180         struct inode_record *rec;
1181         struct inode_backref *backref;
1182
1183         rec = get_inode_rec(inode_cache, ino, 1);
1184         BUG_ON(IS_ERR(rec));
1185         backref = get_inode_backref(rec, name, namelen, dir);
1186         BUG_ON(!backref);
1187         if (errors)
1188                 backref->errors |= errors;
1189         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190                 if (backref->found_dir_index)
1191                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192                 if (backref->found_inode_ref && backref->index != index)
1193                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1194                 if (backref->found_dir_item && backref->filetype != filetype)
1195                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1196
1197                 backref->index = index;
1198                 backref->filetype = filetype;
1199                 backref->found_dir_index = 1;
1200         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1201                 rec->found_link++;
1202                 if (backref->found_dir_item)
1203                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204                 if (backref->found_dir_index && backref->filetype != filetype)
1205                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1206
1207                 backref->filetype = filetype;
1208                 backref->found_dir_item = 1;
1209         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211                 if (backref->found_inode_ref)
1212                         backref->errors |= REF_ERR_DUP_INODE_REF;
1213                 if (backref->found_dir_index && backref->index != index)
1214                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1215                 else
1216                         backref->index = index;
1217
1218                 backref->ref_type = itemtype;
1219                 backref->found_inode_ref = 1;
1220         } else {
1221                 BUG_ON(1);
1222         }
1223
1224         maybe_free_inode_rec(inode_cache, rec);
1225         return 0;
1226 }
1227
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229                             struct cache_tree *dst_cache)
1230 {
1231         struct inode_backref *backref;
1232         u32 dir_count = 0;
1233         int ret = 0;
1234
1235         dst->merging = 1;
1236         list_for_each_entry(backref, &src->backrefs, list) {
1237                 if (backref->found_dir_index) {
1238                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1239                                         backref->index, backref->name,
1240                                         backref->namelen, backref->filetype,
1241                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1242                 }
1243                 if (backref->found_dir_item) {
1244                         dir_count++;
1245                         add_inode_backref(dst_cache, dst->ino,
1246                                         backref->dir, 0, backref->name,
1247                                         backref->namelen, backref->filetype,
1248                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1249                 }
1250                 if (backref->found_inode_ref) {
1251                         add_inode_backref(dst_cache, dst->ino,
1252                                         backref->dir, backref->index,
1253                                         backref->name, backref->namelen, 0,
1254                                         backref->ref_type, backref->errors);
1255                 }
1256         }
1257
1258         if (src->found_dir_item)
1259                 dst->found_dir_item = 1;
1260         if (src->found_file_extent)
1261                 dst->found_file_extent = 1;
1262         if (src->found_csum_item)
1263                 dst->found_csum_item = 1;
1264         if (src->some_csum_missing)
1265                 dst->some_csum_missing = 1;
1266         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1268                 if (ret < 0)
1269                         return ret;
1270         }
1271
1272         BUG_ON(src->found_link < dir_count);
1273         dst->found_link += src->found_link - dir_count;
1274         dst->found_size += src->found_size;
1275         if (src->extent_start != (u64)-1) {
1276                 if (dst->extent_start == (u64)-1) {
1277                         dst->extent_start = src->extent_start;
1278                         dst->extent_end = src->extent_end;
1279                 } else {
1280                         if (dst->extent_end > src->extent_start)
1281                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282                         else if (dst->extent_end < src->extent_start) {
1283                                 ret = add_file_extent_hole(&dst->holes,
1284                                         dst->extent_end,
1285                                         src->extent_start - dst->extent_end);
1286                         }
1287                         if (dst->extent_end < src->extent_end)
1288                                 dst->extent_end = src->extent_end;
1289                 }
1290         }
1291
1292         dst->errors |= src->errors;
1293         if (src->found_inode_item) {
1294                 if (!dst->found_inode_item) {
1295                         dst->nlink = src->nlink;
1296                         dst->isize = src->isize;
1297                         dst->nbytes = src->nbytes;
1298                         dst->imode = src->imode;
1299                         dst->nodatasum = src->nodatasum;
1300                         dst->found_inode_item = 1;
1301                 } else {
1302                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1303                 }
1304         }
1305         dst->merging = 0;
1306
1307         return 0;
1308 }
1309
1310 static int splice_shared_node(struct shared_node *src_node,
1311                               struct shared_node *dst_node)
1312 {
1313         struct cache_extent *cache;
1314         struct ptr_node *node, *ins;
1315         struct cache_tree *src, *dst;
1316         struct inode_record *rec, *conflict;
1317         u64 current_ino = 0;
1318         int splice = 0;
1319         int ret;
1320
1321         if (--src_node->refs == 0)
1322                 splice = 1;
1323         if (src_node->current)
1324                 current_ino = src_node->current->ino;
1325
1326         src = &src_node->root_cache;
1327         dst = &dst_node->root_cache;
1328 again:
1329         cache = search_cache_extent(src, 0);
1330         while (cache) {
1331                 node = container_of(cache, struct ptr_node, cache);
1332                 rec = node->data;
1333                 cache = next_cache_extent(cache);
1334
1335                 if (splice) {
1336                         remove_cache_extent(src, &node->cache);
1337                         ins = node;
1338                 } else {
1339                         ins = malloc(sizeof(*ins));
1340                         BUG_ON(!ins);
1341                         ins->cache.start = node->cache.start;
1342                         ins->cache.size = node->cache.size;
1343                         ins->data = rec;
1344                         rec->refs++;
1345                 }
1346                 ret = insert_cache_extent(dst, &ins->cache);
1347                 if (ret == -EEXIST) {
1348                         conflict = get_inode_rec(dst, rec->ino, 1);
1349                         BUG_ON(IS_ERR(conflict));
1350                         merge_inode_recs(rec, conflict, dst);
1351                         if (rec->checked) {
1352                                 conflict->checked = 1;
1353                                 if (dst_node->current == conflict)
1354                                         dst_node->current = NULL;
1355                         }
1356                         maybe_free_inode_rec(dst, conflict);
1357                         free_inode_rec(rec);
1358                         free(ins);
1359                 } else {
1360                         BUG_ON(ret);
1361                 }
1362         }
1363
1364         if (src == &src_node->root_cache) {
1365                 src = &src_node->inode_cache;
1366                 dst = &dst_node->inode_cache;
1367                 goto again;
1368         }
1369
1370         if (current_ino > 0 && (!dst_node->current ||
1371             current_ino > dst_node->current->ino)) {
1372                 if (dst_node->current) {
1373                         dst_node->current->checked = 1;
1374                         maybe_free_inode_rec(dst, dst_node->current);
1375                 }
1376                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377                 BUG_ON(IS_ERR(dst_node->current));
1378         }
1379         return 0;
1380 }
1381
1382 static void free_inode_ptr(struct cache_extent *cache)
1383 {
1384         struct ptr_node *node;
1385         struct inode_record *rec;
1386
1387         node = container_of(cache, struct ptr_node, cache);
1388         rec = node->data;
1389         free_inode_rec(rec);
1390         free(node);
1391 }
1392
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1394
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396                                             u64 bytenr)
1397 {
1398         struct cache_extent *cache;
1399         struct shared_node *node;
1400
1401         cache = lookup_cache_extent(shared, bytenr, 1);
1402         if (cache) {
1403                 node = container_of(cache, struct shared_node, cache);
1404                 return node;
1405         }
1406         return NULL;
1407 }
1408
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 {
1411         int ret;
1412         struct shared_node *node;
1413
1414         node = calloc(1, sizeof(*node));
1415         if (!node)
1416                 return -ENOMEM;
1417         node->cache.start = bytenr;
1418         node->cache.size = 1;
1419         cache_tree_init(&node->root_cache);
1420         cache_tree_init(&node->inode_cache);
1421         node->refs = refs;
1422
1423         ret = insert_cache_extent(shared, &node->cache);
1424
1425         return ret;
1426 }
1427
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429                              struct walk_control *wc, int level)
1430 {
1431         struct shared_node *node;
1432         struct shared_node *dest;
1433         int ret;
1434
1435         if (level == wc->active_node)
1436                 return 0;
1437
1438         BUG_ON(wc->active_node <= level);
1439         node = find_shared_node(&wc->shared, bytenr);
1440         if (!node) {
1441                 ret = add_shared_node(&wc->shared, bytenr, refs);
1442                 BUG_ON(ret);
1443                 node = find_shared_node(&wc->shared, bytenr);
1444                 wc->nodes[level] = node;
1445                 wc->active_node = level;
1446                 return 0;
1447         }
1448
1449         if (wc->root_level == wc->active_node &&
1450             btrfs_root_refs(&root->root_item) == 0) {
1451                 if (--node->refs == 0) {
1452                         free_inode_recs_tree(&node->root_cache);
1453                         free_inode_recs_tree(&node->inode_cache);
1454                         remove_cache_extent(&wc->shared, &node->cache);
1455                         free(node);
1456                 }
1457                 return 1;
1458         }
1459
1460         dest = wc->nodes[wc->active_node];
1461         splice_shared_node(node, dest);
1462         if (node->refs == 0) {
1463                 remove_cache_extent(&wc->shared, &node->cache);
1464                 free(node);
1465         }
1466         return 1;
1467 }
1468
1469 static int leave_shared_node(struct btrfs_root *root,
1470                              struct walk_control *wc, int level)
1471 {
1472         struct shared_node *node;
1473         struct shared_node *dest;
1474         int i;
1475
1476         if (level == wc->root_level)
1477                 return 0;
1478
1479         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1480                 if (wc->nodes[i])
1481                         break;
1482         }
1483         BUG_ON(i >= BTRFS_MAX_LEVEL);
1484
1485         node = wc->nodes[wc->active_node];
1486         wc->nodes[wc->active_node] = NULL;
1487         wc->active_node = i;
1488
1489         dest = wc->nodes[wc->active_node];
1490         if (wc->active_node < wc->root_level ||
1491             btrfs_root_refs(&root->root_item) > 0) {
1492                 BUG_ON(node->refs <= 1);
1493                 splice_shared_node(node, dest);
1494         } else {
1495                 BUG_ON(node->refs < 2);
1496                 node->refs--;
1497         }
1498         return 0;
1499 }
1500
1501 /*
1502  * Returns:
1503  * < 0 - on error
1504  * 1   - if the root with id child_root_id is a child of root parent_root_id
1505  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1506  *       has other root(s) as parent(s)
1507  * 2   - if the root child_root_id doesn't have any parent roots
1508  */
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510                          u64 child_root_id)
1511 {
1512         struct btrfs_path path;
1513         struct btrfs_key key;
1514         struct extent_buffer *leaf;
1515         int has_parent = 0;
1516         int ret;
1517
1518         btrfs_init_path(&path);
1519
1520         key.objectid = parent_root_id;
1521         key.type = BTRFS_ROOT_REF_KEY;
1522         key.offset = child_root_id;
1523         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1524                                 0, 0);
1525         if (ret < 0)
1526                 return ret;
1527         btrfs_release_path(&path);
1528         if (!ret)
1529                 return 1;
1530
1531         key.objectid = child_root_id;
1532         key.type = BTRFS_ROOT_BACKREF_KEY;
1533         key.offset = 0;
1534         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1535                                 0, 0);
1536         if (ret < 0)
1537                 goto out;
1538
1539         while (1) {
1540                 leaf = path.nodes[0];
1541                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543                         if (ret)
1544                                 break;
1545                         leaf = path.nodes[0];
1546                 }
1547
1548                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549                 if (key.objectid != child_root_id ||
1550                     key.type != BTRFS_ROOT_BACKREF_KEY)
1551                         break;
1552
1553                 has_parent = 1;
1554
1555                 if (key.offset == parent_root_id) {
1556                         btrfs_release_path(&path);
1557                         return 1;
1558                 }
1559
1560                 path.slots[0]++;
1561         }
1562 out:
1563         btrfs_release_path(&path);
1564         if (ret < 0)
1565                 return ret;
1566         return has_parent ? 0 : 2;
1567 }
1568
1569 static int process_dir_item(struct extent_buffer *eb,
1570                             int slot, struct btrfs_key *key,
1571                             struct shared_node *active_node)
1572 {
1573         u32 total;
1574         u32 cur = 0;
1575         u32 len;
1576         u32 name_len;
1577         u32 data_len;
1578         int error;
1579         int nritems = 0;
1580         u8 filetype;
1581         struct btrfs_dir_item *di;
1582         struct inode_record *rec;
1583         struct cache_tree *root_cache;
1584         struct cache_tree *inode_cache;
1585         struct btrfs_key location;
1586         char namebuf[BTRFS_NAME_LEN];
1587
1588         root_cache = &active_node->root_cache;
1589         inode_cache = &active_node->inode_cache;
1590         rec = active_node->current;
1591         rec->found_dir_item = 1;
1592
1593         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594         total = btrfs_item_size_nr(eb, slot);
1595         while (cur < total) {
1596                 nritems++;
1597                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598                 name_len = btrfs_dir_name_len(eb, di);
1599                 data_len = btrfs_dir_data_len(eb, di);
1600                 filetype = btrfs_dir_type(eb, di);
1601
1602                 rec->found_size += name_len;
1603                 if (cur + sizeof(*di) + name_len > total ||
1604                     name_len > BTRFS_NAME_LEN) {
1605                         error = REF_ERR_NAME_TOO_LONG;
1606
1607                         if (cur + sizeof(*di) > total)
1608                                 break;
1609                         len = min_t(u32, total - cur - sizeof(*di),
1610                                     BTRFS_NAME_LEN);
1611                 } else {
1612                         len = name_len;
1613                         error = 0;
1614                 }
1615
1616                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1617
1618                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619                     key->offset != btrfs_name_hash(namebuf, len)) {
1620                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1621                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622                         key->objectid, key->offset, namebuf, len, filetype,
1623                         key->offset, btrfs_name_hash(namebuf, len));
1624                 }
1625
1626                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627                         add_inode_backref(inode_cache, location.objectid,
1628                                           key->objectid, key->offset, namebuf,
1629                                           len, filetype, key->type, error);
1630                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631                         add_inode_backref(root_cache, location.objectid,
1632                                           key->objectid, key->offset,
1633                                           namebuf, len, filetype,
1634                                           key->type, error);
1635                 } else {
1636                         fprintf(stderr, "invalid location in dir item %u\n",
1637                                 location.type);
1638                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639                                           key->objectid, key->offset, namebuf,
1640                                           len, filetype, key->type, error);
1641                 }
1642
1643                 len = sizeof(*di) + name_len + data_len;
1644                 di = (struct btrfs_dir_item *)((char *)di + len);
1645                 cur += len;
1646         }
1647         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1649
1650         return 0;
1651 }
1652
1653 static int process_inode_ref(struct extent_buffer *eb,
1654                              int slot, struct btrfs_key *key,
1655                              struct shared_node *active_node)
1656 {
1657         u32 total;
1658         u32 cur = 0;
1659         u32 len;
1660         u32 name_len;
1661         u64 index;
1662         int error;
1663         struct cache_tree *inode_cache;
1664         struct btrfs_inode_ref *ref;
1665         char namebuf[BTRFS_NAME_LEN];
1666
1667         inode_cache = &active_node->inode_cache;
1668
1669         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670         total = btrfs_item_size_nr(eb, slot);
1671         while (cur < total) {
1672                 name_len = btrfs_inode_ref_name_len(eb, ref);
1673                 index = btrfs_inode_ref_index(eb, ref);
1674
1675                 /* inode_ref + namelen should not cross item boundary */
1676                 if (cur + sizeof(*ref) + name_len > total ||
1677                     name_len > BTRFS_NAME_LEN) {
1678                         if (total < cur + sizeof(*ref))
1679                                 break;
1680
1681                         /* Still try to read out the remaining part */
1682                         len = min_t(u32, total - cur - sizeof(*ref),
1683                                     BTRFS_NAME_LEN);
1684                         error = REF_ERR_NAME_TOO_LONG;
1685                 } else {
1686                         len = name_len;
1687                         error = 0;
1688                 }
1689
1690                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691                 add_inode_backref(inode_cache, key->objectid, key->offset,
1692                                   index, namebuf, len, 0, key->type, error);
1693
1694                 len = sizeof(*ref) + name_len;
1695                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1696                 cur += len;
1697         }
1698         return 0;
1699 }
1700
1701 static int process_inode_extref(struct extent_buffer *eb,
1702                                 int slot, struct btrfs_key *key,
1703                                 struct shared_node *active_node)
1704 {
1705         u32 total;
1706         u32 cur = 0;
1707         u32 len;
1708         u32 name_len;
1709         u64 index;
1710         u64 parent;
1711         int error;
1712         struct cache_tree *inode_cache;
1713         struct btrfs_inode_extref *extref;
1714         char namebuf[BTRFS_NAME_LEN];
1715
1716         inode_cache = &active_node->inode_cache;
1717
1718         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719         total = btrfs_item_size_nr(eb, slot);
1720         while (cur < total) {
1721                 name_len = btrfs_inode_extref_name_len(eb, extref);
1722                 index = btrfs_inode_extref_index(eb, extref);
1723                 parent = btrfs_inode_extref_parent(eb, extref);
1724                 if (name_len <= BTRFS_NAME_LEN) {
1725                         len = name_len;
1726                         error = 0;
1727                 } else {
1728                         len = BTRFS_NAME_LEN;
1729                         error = REF_ERR_NAME_TOO_LONG;
1730                 }
1731                 read_extent_buffer(eb, namebuf,
1732                                    (unsigned long)(extref + 1), len);
1733                 add_inode_backref(inode_cache, key->objectid, parent,
1734                                   index, namebuf, len, 0, key->type, error);
1735
1736                 len = sizeof(*extref) + name_len;
1737                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1738                 cur += len;
1739         }
1740         return 0;
1741
1742 }
1743
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745                             u64 len, u64 *found)
1746 {
1747         struct btrfs_key key;
1748         struct btrfs_path path;
1749         struct extent_buffer *leaf;
1750         int ret;
1751         size_t size;
1752         *found = 0;
1753         u64 csum_end;
1754         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1755
1756         btrfs_init_path(&path);
1757
1758         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1759         key.offset = start;
1760         key.type = BTRFS_EXTENT_CSUM_KEY;
1761
1762         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1763                                 &key, &path, 0, 0);
1764         if (ret < 0)
1765                 goto out;
1766         if (ret > 0 && path.slots[0] > 0) {
1767                 leaf = path.nodes[0];
1768                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770                     key.type == BTRFS_EXTENT_CSUM_KEY)
1771                         path.slots[0]--;
1772         }
1773
1774         while (len > 0) {
1775                 leaf = path.nodes[0];
1776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1778                         if (ret > 0)
1779                                 break;
1780                         else if (ret < 0)
1781                                 goto out;
1782                         leaf = path.nodes[0];
1783                 }
1784
1785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787                     key.type != BTRFS_EXTENT_CSUM_KEY)
1788                         break;
1789
1790                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791                 if (key.offset >= start + len)
1792                         break;
1793
1794                 if (key.offset > start)
1795                         start = key.offset;
1796
1797                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798                 csum_end = key.offset + (size / csum_size) *
1799                            root->fs_info->sectorsize;
1800                 if (csum_end > start) {
1801                         size = min(csum_end - start, len);
1802                         len -= size;
1803                         start += size;
1804                         *found += size;
1805                 }
1806
1807                 path.slots[0]++;
1808         }
1809 out:
1810         btrfs_release_path(&path);
1811         if (ret < 0)
1812                 return ret;
1813         return 0;
1814 }
1815
1816 static int process_file_extent(struct btrfs_root *root,
1817                                 struct extent_buffer *eb,
1818                                 int slot, struct btrfs_key *key,
1819                                 struct shared_node *active_node)
1820 {
1821         struct inode_record *rec;
1822         struct btrfs_file_extent_item *fi;
1823         u64 num_bytes = 0;
1824         u64 disk_bytenr = 0;
1825         u64 extent_offset = 0;
1826         u64 mask = root->fs_info->sectorsize - 1;
1827         int extent_type;
1828         int ret;
1829
1830         rec = active_node->current;
1831         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832         rec->found_file_extent = 1;
1833
1834         if (rec->extent_start == (u64)-1) {
1835                 rec->extent_start = key->offset;
1836                 rec->extent_end = key->offset;
1837         }
1838
1839         if (rec->extent_end > key->offset)
1840                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841         else if (rec->extent_end < key->offset) {
1842                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843                                            key->offset - rec->extent_end);
1844                 if (ret < 0)
1845                         return ret;
1846         }
1847
1848         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849         extent_type = btrfs_file_extent_type(eb, fi);
1850
1851         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1853                 if (num_bytes == 0)
1854                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855                 rec->found_size += num_bytes;
1856                 num_bytes = (num_bytes + mask) & ~mask;
1857         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861                 extent_offset = btrfs_file_extent_offset(eb, fi);
1862                 if (num_bytes == 0 || (num_bytes & mask))
1863                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864                 if (num_bytes + extent_offset >
1865                     btrfs_file_extent_ram_bytes(eb, fi))
1866                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868                     (btrfs_file_extent_compression(eb, fi) ||
1869                      btrfs_file_extent_encryption(eb, fi) ||
1870                      btrfs_file_extent_other_encoding(eb, fi)))
1871                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872                 if (disk_bytenr > 0)
1873                         rec->found_size += num_bytes;
1874         } else {
1875                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1876         }
1877         rec->extent_end = key->offset + num_bytes;
1878
1879         /*
1880          * The data reloc tree will copy full extents into its inode and then
1881          * copy the corresponding csums.  Because the extent it copied could be
1882          * a preallocated extent that hasn't been written to yet there may be no
1883          * csums to copy, ergo we won't have csums for our file extent.  This is
1884          * ok so just don't bother checking csums if the inode belongs to the
1885          * data reloc tree.
1886          */
1887         if (disk_bytenr > 0 &&
1888             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1889                 u64 found;
1890                 if (btrfs_file_extent_compression(eb, fi))
1891                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1892                 else
1893                         disk_bytenr += extent_offset;
1894
1895                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896                 if (ret < 0)
1897                         return ret;
1898                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1899                         if (found > 0)
1900                                 rec->found_csum_item = 1;
1901                         if (found < num_bytes)
1902                                 rec->some_csum_missing = 1;
1903                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1904                         if (found > 0)
1905                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1906                 }
1907         }
1908         return 0;
1909 }
1910
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912                             struct walk_control *wc)
1913 {
1914         struct btrfs_key key;
1915         u32 nritems;
1916         int i;
1917         int ret = 0;
1918         struct cache_tree *inode_cache;
1919         struct shared_node *active_node;
1920
1921         if (wc->root_level == wc->active_node &&
1922             btrfs_root_refs(&root->root_item) == 0)
1923                 return 0;
1924
1925         active_node = wc->nodes[wc->active_node];
1926         inode_cache = &active_node->inode_cache;
1927         nritems = btrfs_header_nritems(eb);
1928         for (i = 0; i < nritems; i++) {
1929                 btrfs_item_key_to_cpu(eb, &key, i);
1930
1931                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1932                         continue;
1933                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934                         continue;
1935
1936                 if (active_node->current == NULL ||
1937                     active_node->current->ino < key.objectid) {
1938                         if (active_node->current) {
1939                                 active_node->current->checked = 1;
1940                                 maybe_free_inode_rec(inode_cache,
1941                                                      active_node->current);
1942                         }
1943                         active_node->current = get_inode_rec(inode_cache,
1944                                                              key.objectid, 1);
1945                         BUG_ON(IS_ERR(active_node->current));
1946                 }
1947                 switch (key.type) {
1948                 case BTRFS_DIR_ITEM_KEY:
1949                 case BTRFS_DIR_INDEX_KEY:
1950                         ret = process_dir_item(eb, i, &key, active_node);
1951                         break;
1952                 case BTRFS_INODE_REF_KEY:
1953                         ret = process_inode_ref(eb, i, &key, active_node);
1954                         break;
1955                 case BTRFS_INODE_EXTREF_KEY:
1956                         ret = process_inode_extref(eb, i, &key, active_node);
1957                         break;
1958                 case BTRFS_INODE_ITEM_KEY:
1959                         ret = process_inode_item(eb, i, &key, active_node);
1960                         break;
1961                 case BTRFS_EXTENT_DATA_KEY:
1962                         ret = process_file_extent(root, eb, i, &key,
1963                                                   active_node);
1964                         break;
1965                 default:
1966                         break;
1967                 };
1968         }
1969         return ret;
1970 }
1971
1972 struct node_refs {
1973         u64 bytenr[BTRFS_MAX_LEVEL];
1974         u64 refs[BTRFS_MAX_LEVEL];
1975         int need_check[BTRFS_MAX_LEVEL];
1976 };
1977
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979                              struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981                             unsigned int ext_ref);
1982
1983 /*
1984  * Returns >0  Found error, not fatal, should continue
1985  * Returns <0  Fatal error, must exit the whole check
1986  * Returns 0   No errors found
1987  */
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989                                struct node_refs *nrefs, int *level, int ext_ref)
1990 {
1991         struct extent_buffer *cur = path->nodes[0];
1992         struct btrfs_key key;
1993         u64 cur_bytenr;
1994         u32 nritems;
1995         u64 first_ino = 0;
1996         int root_level = btrfs_header_level(root->node);
1997         int i;
1998         int ret = 0; /* Final return value */
1999         int err = 0; /* Positive error bitmap */
2000
2001         cur_bytenr = cur->start;
2002
2003         /* skip to first inode item or the first inode number change */
2004         nritems = btrfs_header_nritems(cur);
2005         for (i = 0; i < nritems; i++) {
2006                 btrfs_item_key_to_cpu(cur, &key, i);
2007                 if (i == 0)
2008                         first_ino = key.objectid;
2009                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010                     (first_ino && first_ino != key.objectid))
2011                         break;
2012         }
2013         if (i == nritems) {
2014                 path->slots[0] = nritems;
2015                 return 0;
2016         }
2017         path->slots[0] = i;
2018
2019 again:
2020         err |= check_inode_item(root, path, ext_ref);
2021
2022         /* modify cur since check_inode_item may change path */
2023         cur = path->nodes[0];
2024
2025         if (err & LAST_ITEM)
2026                 goto out;
2027
2028         /* still have inode items in thie leaf */
2029         if (cur->start == cur_bytenr)
2030                 goto again;
2031
2032         /*
2033          * we have switched to another leaf, above nodes may
2034          * have changed, here walk down the path, if a node
2035          * or leaf is shared, check whether we can skip this
2036          * node or leaf.
2037          */
2038         for (i = root_level; i >= 0; i--) {
2039                 if (path->nodes[i]->start == nrefs->bytenr[i])
2040                         continue;
2041
2042                 ret = update_nodes_refs(root,
2043                                 path->nodes[i]->start,
2044                                 nrefs, i);
2045                 if (ret)
2046                         goto out;
2047
2048                 if (!nrefs->need_check[i]) {
2049                         *level += 1;
2050                         break;
2051                 }
2052         }
2053
2054         for (i = 0; i < *level; i++) {
2055                 free_extent_buffer(path->nodes[i]);
2056                 path->nodes[i] = NULL;
2057         }
2058 out:
2059         err &= ~LAST_ITEM;
2060         if (err && !ret)
2061                 ret = err;
2062         return ret;
2063 }
2064
2065 static void reada_walk_down(struct btrfs_root *root,
2066                             struct extent_buffer *node, int slot)
2067 {
2068         struct btrfs_fs_info *fs_info = root->fs_info;
2069         u64 bytenr;
2070         u64 ptr_gen;
2071         u32 nritems;
2072         int i;
2073         int level;
2074
2075         level = btrfs_header_level(node);
2076         if (level != 1)
2077                 return;
2078
2079         nritems = btrfs_header_nritems(node);
2080         for (i = slot; i < nritems; i++) {
2081                 bytenr = btrfs_node_blockptr(node, i);
2082                 ptr_gen = btrfs_node_ptr_generation(node, i);
2083                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2084         }
2085 }
2086
2087 /*
2088  * Check the child node/leaf by the following condition:
2089  * 1. the first item key of the node/leaf should be the same with the one
2090  *    in parent.
2091  * 2. block in parent node should match the child node/leaf.
2092  * 3. generation of parent node and child's header should be consistent.
2093  *
2094  * Or the child node/leaf pointed by the key in parent is not valid.
2095  *
2096  * We hope to check leaf owner too, but since subvol may share leaves,
2097  * which makes leaf owner check not so strong, key check should be
2098  * sufficient enough for that case.
2099  */
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101                             struct extent_buffer *child)
2102 {
2103         struct btrfs_key parent_key;
2104         struct btrfs_key child_key;
2105         int ret = 0;
2106
2107         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108         if (btrfs_header_level(child) == 0)
2109                 btrfs_item_key_to_cpu(child, &child_key, 0);
2110         else
2111                 btrfs_node_key_to_cpu(child, &child_key, 0);
2112
2113         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114                 ret = -EINVAL;
2115                 fprintf(stderr,
2116                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117                         parent_key.objectid, parent_key.type, parent_key.offset,
2118                         child_key.objectid, child_key.type, child_key.offset);
2119         }
2120         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2121                 ret = -EINVAL;
2122                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123                         btrfs_node_blockptr(parent, slot),
2124                         btrfs_header_bytenr(child));
2125         }
2126         if (btrfs_node_ptr_generation(parent, slot) !=
2127             btrfs_header_generation(child)) {
2128                 ret = -EINVAL;
2129                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130                         btrfs_header_generation(child),
2131                         btrfs_node_ptr_generation(parent, slot));
2132         }
2133         return ret;
2134 }
2135
2136 /*
2137  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138  * in every fs or file tree check. Here we find its all root ids, and only check
2139  * it in the fs or file tree which has the smallest root id.
2140  */
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2142 {
2143         struct rb_node *node;
2144         struct ulist_node *u;
2145
2146         if (roots->nnodes == 1)
2147                 return 1;
2148
2149         node = rb_first(&roots->root);
2150         u = rb_entry(node, struct ulist_node, rb_node);
2151         /*
2152          * current root id is not smallest, we skip it and let it be checked
2153          * in the fs or file tree who hash the smallest root id.
2154          */
2155         if (root->objectid != u->val)
2156                 return 0;
2157
2158         return 1;
2159 }
2160
2161 /*
2162  * for a tree node or leaf, we record its reference count, so later if we still
2163  * process this node or leaf, don't need to compute its reference count again.
2164  */
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166                              struct node_refs *nrefs, u64 level)
2167 {
2168         int check, ret;
2169         u64 refs;
2170         struct ulist *roots;
2171
2172         if (nrefs->bytenr[level] != bytenr) {
2173                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174                                        level, 1, &refs, NULL);
2175                 if (ret < 0)
2176                         return ret;
2177
2178                 nrefs->bytenr[level] = bytenr;
2179                 nrefs->refs[level] = refs;
2180                 if (refs > 1) {
2181                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2182                                                    0, &roots);
2183                         if (ret)
2184                                 return -EIO;
2185
2186                         check = need_check(root, roots);
2187                         ulist_free(roots);
2188                         nrefs->need_check[level] = check;
2189                 } else {
2190                         nrefs->need_check[level] = 1;
2191                 }
2192         }
2193
2194         return 0;
2195 }
2196
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198                           struct walk_control *wc, int *level,
2199                           struct node_refs *nrefs)
2200 {
2201         enum btrfs_tree_block_status status;
2202         u64 bytenr;
2203         u64 ptr_gen;
2204         struct btrfs_fs_info *fs_info = root->fs_info;
2205         struct extent_buffer *next;
2206         struct extent_buffer *cur;
2207         int ret, err = 0;
2208         u64 refs;
2209
2210         WARN_ON(*level < 0);
2211         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2212
2213         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214                 refs = nrefs->refs[*level];
2215                 ret = 0;
2216         } else {
2217                 ret = btrfs_lookup_extent_info(NULL, root,
2218                                        path->nodes[*level]->start,
2219                                        *level, 1, &refs, NULL);
2220                 if (ret < 0) {
2221                         err = ret;
2222                         goto out;
2223                 }
2224                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225                 nrefs->refs[*level] = refs;
2226         }
2227
2228         if (refs > 1) {
2229                 ret = enter_shared_node(root, path->nodes[*level]->start,
2230                                         refs, wc, *level);
2231                 if (ret > 0) {
2232                         err = ret;
2233                         goto out;
2234                 }
2235         }
2236
2237         while (*level >= 0) {
2238                 WARN_ON(*level < 0);
2239                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240                 cur = path->nodes[*level];
2241
2242                 if (btrfs_header_level(cur) != *level)
2243                         WARN_ON(1);
2244
2245                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246                         break;
2247                 if (*level == 0) {
2248                         ret = process_one_leaf(root, cur, wc);
2249                         if (ret < 0)
2250                                 err = ret;
2251                         break;
2252                 }
2253                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2255
2256                 if (bytenr == nrefs->bytenr[*level - 1]) {
2257                         refs = nrefs->refs[*level - 1];
2258                 } else {
2259                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260                                         *level - 1, 1, &refs, NULL);
2261                         if (ret < 0) {
2262                                 refs = 0;
2263                         } else {
2264                                 nrefs->bytenr[*level - 1] = bytenr;
2265                                 nrefs->refs[*level - 1] = refs;
2266                         }
2267                 }
2268
2269                 if (refs > 1) {
2270                         ret = enter_shared_node(root, bytenr, refs,
2271                                                 wc, *level - 1);
2272                         if (ret > 0) {
2273                                 path->slots[*level]++;
2274                                 continue;
2275                         }
2276                 }
2277
2278                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280                         free_extent_buffer(next);
2281                         reada_walk_down(root, cur, path->slots[*level]);
2282                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283                         if (!extent_buffer_uptodate(next)) {
2284                                 struct btrfs_key node_key;
2285
2286                                 btrfs_node_key_to_cpu(path->nodes[*level],
2287                                                       &node_key,
2288                                                       path->slots[*level]);
2289                                 btrfs_add_corrupt_extent_record(root->fs_info,
2290                                                 &node_key,
2291                                                 path->nodes[*level]->start,
2292                                                 root->fs_info->nodesize,
2293                                                 *level);
2294                                 err = -EIO;
2295                                 goto out;
2296                         }
2297                 }
2298
2299                 ret = check_child_node(cur, path->slots[*level], next);
2300                 if (ret) {
2301                         free_extent_buffer(next);
2302                         err = ret;
2303                         goto out;
2304                 }
2305
2306                 if (btrfs_is_leaf(next))
2307                         status = btrfs_check_leaf(root, NULL, next);
2308                 else
2309                         status = btrfs_check_node(root, NULL, next);
2310                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311                         free_extent_buffer(next);
2312                         err = -EIO;
2313                         goto out;
2314                 }
2315
2316                 *level = *level - 1;
2317                 free_extent_buffer(path->nodes[*level]);
2318                 path->nodes[*level] = next;
2319                 path->slots[*level] = 0;
2320         }
2321 out:
2322         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2323         return err;
2324 }
2325
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327                             unsigned int ext_ref);
2328
2329 /*
2330  * Returns >0  Found error, should continue
2331  * Returns <0  Fatal error, must exit the whole check
2332  * Returns 0   No errors found
2333  */
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335                              int *level, struct node_refs *nrefs, int ext_ref)
2336 {
2337         enum btrfs_tree_block_status status;
2338         u64 bytenr;
2339         u64 ptr_gen;
2340         struct btrfs_fs_info *fs_info = root->fs_info;
2341         struct extent_buffer *next;
2342         struct extent_buffer *cur;
2343         int ret;
2344
2345         WARN_ON(*level < 0);
2346         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2347
2348         ret = update_nodes_refs(root, path->nodes[*level]->start,
2349                                 nrefs, *level);
2350         if (ret < 0)
2351                 return ret;
2352
2353         while (*level >= 0) {
2354                 WARN_ON(*level < 0);
2355                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356                 cur = path->nodes[*level];
2357
2358                 if (btrfs_header_level(cur) != *level)
2359                         WARN_ON(1);
2360
2361                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2362                         break;
2363                 /* Don't forgot to check leaf/node validation */
2364                 if (*level == 0) {
2365                         ret = btrfs_check_leaf(root, NULL, cur);
2366                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2367                                 ret = -EIO;
2368                                 break;
2369                         }
2370                         ret = process_one_leaf_v2(root, path, nrefs,
2371                                                   level, ext_ref);
2372                         cur = path->nodes[*level];
2373                         break;
2374                 } else {
2375                         ret = btrfs_check_node(root, NULL, cur);
2376                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2377                                 ret = -EIO;
2378                                 break;
2379                         }
2380                 }
2381                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2383
2384                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385                 if (ret)
2386                         break;
2387                 if (!nrefs->need_check[*level - 1]) {
2388                         path->slots[*level]++;
2389                         continue;
2390                 }
2391
2392                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394                         free_extent_buffer(next);
2395                         reada_walk_down(root, cur, path->slots[*level]);
2396                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2397                         if (!extent_buffer_uptodate(next)) {
2398                                 struct btrfs_key node_key;
2399
2400                                 btrfs_node_key_to_cpu(path->nodes[*level],
2401                                                       &node_key,
2402                                                       path->slots[*level]);
2403                                 btrfs_add_corrupt_extent_record(fs_info,
2404                                                 &node_key,
2405                                                 path->nodes[*level]->start,
2406                                                 fs_info->nodesize,
2407                                                 *level);
2408                                 ret = -EIO;
2409                                 break;
2410                         }
2411                 }
2412
2413                 ret = check_child_node(cur, path->slots[*level], next);
2414                 if (ret < 0) 
2415                         break;
2416
2417                 if (btrfs_is_leaf(next))
2418                         status = btrfs_check_leaf(root, NULL, next);
2419                 else
2420                         status = btrfs_check_node(root, NULL, next);
2421                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422                         free_extent_buffer(next);
2423                         ret = -EIO;
2424                         break;
2425                 }
2426
2427                 *level = *level - 1;
2428                 free_extent_buffer(path->nodes[*level]);
2429                 path->nodes[*level] = next;
2430                 path->slots[*level] = 0;
2431         }
2432         return ret;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int create_inode_item(struct btrfs_root *root,
2695                              struct inode_record *rec,
2696                              int root_dir)
2697 {
2698         struct btrfs_trans_handle *trans;
2699         struct btrfs_inode_item inode_item;
2700         time_t now = time(NULL);
2701         int ret;
2702
2703         trans = btrfs_start_transaction(root, 1);
2704         if (IS_ERR(trans)) {
2705                 ret = PTR_ERR(trans);
2706                 return ret;
2707         }
2708
2709         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2710                 "be incomplete, please check permissions and content after "
2711                 "the fsck completes.\n", (unsigned long long)root->objectid,
2712                 (unsigned long long)rec->ino);
2713
2714         memset(&inode_item, 0, sizeof(inode_item));
2715         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2716         if (root_dir)
2717                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2718         else
2719                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2720         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2721         if (rec->found_dir_item) {
2722                 if (rec->found_file_extent)
2723                         fprintf(stderr, "root %llu inode %llu has both a dir "
2724                                 "item and extents, unsure if it is a dir or a "
2725                                 "regular file so setting it as a directory\n",
2726                                 (unsigned long long)root->objectid,
2727                                 (unsigned long long)rec->ino);
2728                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2729                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2730         } else if (!rec->found_dir_item) {
2731                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2732                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2733         }
2734         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2735         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2736         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2737         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2738         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2739         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2740         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2741         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2742
2743         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2744         BUG_ON(ret);
2745         btrfs_commit_transaction(trans, root);
2746         return 0;
2747 }
2748
2749 static int repair_inode_backrefs(struct btrfs_root *root,
2750                                  struct inode_record *rec,
2751                                  struct cache_tree *inode_cache,
2752                                  int delete)
2753 {
2754         struct inode_backref *tmp, *backref;
2755         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2756         int ret = 0;
2757         int repaired = 0;
2758
2759         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2760                 if (!delete && rec->ino == root_dirid) {
2761                         if (!rec->found_inode_item) {
2762                                 ret = create_inode_item(root, rec, 1);
2763                                 if (ret)
2764                                         break;
2765                                 repaired++;
2766                         }
2767                 }
2768
2769                 /* Index 0 for root dir's are special, don't mess with it */
2770                 if (rec->ino == root_dirid && backref->index == 0)
2771                         continue;
2772
2773                 if (delete &&
2774                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2775                      (backref->found_dir_index && backref->found_inode_ref &&
2776                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2777                         ret = delete_dir_index(root, backref);
2778                         if (ret)
2779                                 break;
2780                         repaired++;
2781                         list_del(&backref->list);
2782                         free(backref);
2783                         continue;
2784                 }
2785
2786                 if (!delete && !backref->found_dir_index &&
2787                     backref->found_dir_item && backref->found_inode_ref) {
2788                         ret = add_missing_dir_index(root, inode_cache, rec,
2789                                                     backref);
2790                         if (ret)
2791                                 break;
2792                         repaired++;
2793                         if (backref->found_dir_item &&
2794                             backref->found_dir_index) {
2795                                 if (!backref->errors &&
2796                                     backref->found_inode_ref) {
2797                                         list_del(&backref->list);
2798                                         free(backref);
2799                                         continue;
2800                                 }
2801                         }
2802                 }
2803
2804                 if (!delete && (!backref->found_dir_index &&
2805                                 !backref->found_dir_item &&
2806                                 backref->found_inode_ref)) {
2807                         struct btrfs_trans_handle *trans;
2808                         struct btrfs_key location;
2809
2810                         ret = check_dir_conflict(root, backref->name,
2811                                                  backref->namelen,
2812                                                  backref->dir,
2813                                                  backref->index);
2814                         if (ret) {
2815                                 /*
2816                                  * let nlink fixing routine to handle it,
2817                                  * which can do it better.
2818                                  */
2819                                 ret = 0;
2820                                 break;
2821                         }
2822                         location.objectid = rec->ino;
2823                         location.type = BTRFS_INODE_ITEM_KEY;
2824                         location.offset = 0;
2825
2826                         trans = btrfs_start_transaction(root, 1);
2827                         if (IS_ERR(trans)) {
2828                                 ret = PTR_ERR(trans);
2829                                 break;
2830                         }
2831                         fprintf(stderr, "adding missing dir index/item pair "
2832                                 "for inode %llu\n",
2833                                 (unsigned long long)rec->ino);
2834                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2835                                                     backref->namelen,
2836                                                     backref->dir, &location,
2837                                                     imode_to_type(rec->imode),
2838                                                     backref->index);
2839                         BUG_ON(ret);
2840                         btrfs_commit_transaction(trans, root);
2841                         repaired++;
2842                 }
2843
2844                 if (!delete && (backref->found_inode_ref &&
2845                                 backref->found_dir_index &&
2846                                 backref->found_dir_item &&
2847                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2848                                 !rec->found_inode_item)) {
2849                         ret = create_inode_item(root, rec, 0);
2850                         if (ret)
2851                                 break;
2852                         repaired++;
2853                 }
2854
2855         }
2856         return ret ? ret : repaired;
2857 }
2858
2859 /*
2860  * To determine the file type for nlink/inode_item repair
2861  *
2862  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2863  * Return -ENOENT if file type is not found.
2864  */
2865 static int find_file_type(struct inode_record *rec, u8 *type)
2866 {
2867         struct inode_backref *backref;
2868
2869         /* For inode item recovered case */
2870         if (rec->found_inode_item) {
2871                 *type = imode_to_type(rec->imode);
2872                 return 0;
2873         }
2874
2875         list_for_each_entry(backref, &rec->backrefs, list) {
2876                 if (backref->found_dir_index || backref->found_dir_item) {
2877                         *type = backref->filetype;
2878                         return 0;
2879                 }
2880         }
2881         return -ENOENT;
2882 }
2883
2884 /*
2885  * To determine the file name for nlink repair
2886  *
2887  * Return 0 if file name is found, set name and namelen.
2888  * Return -ENOENT if file name is not found.
2889  */
2890 static int find_file_name(struct inode_record *rec,
2891                           char *name, int *namelen)
2892 {
2893         struct inode_backref *backref;
2894
2895         list_for_each_entry(backref, &rec->backrefs, list) {
2896                 if (backref->found_dir_index || backref->found_dir_item ||
2897                     backref->found_inode_ref) {
2898                         memcpy(name, backref->name, backref->namelen);
2899                         *namelen = backref->namelen;
2900                         return 0;
2901                 }
2902         }
2903         return -ENOENT;
2904 }
2905
2906 /* Reset the nlink of the inode to the correct one */
2907 static int reset_nlink(struct btrfs_trans_handle *trans,
2908                        struct btrfs_root *root,
2909                        struct btrfs_path *path,
2910                        struct inode_record *rec)
2911 {
2912         struct inode_backref *backref;
2913         struct inode_backref *tmp;
2914         struct btrfs_key key;
2915         struct btrfs_inode_item *inode_item;
2916         int ret = 0;
2917
2918         /* We don't believe this either, reset it and iterate backref */
2919         rec->found_link = 0;
2920
2921         /* Remove all backref including the valid ones */
2922         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2923                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2924                                    backref->index, backref->name,
2925                                    backref->namelen, 0);
2926                 if (ret < 0)
2927                         goto out;
2928
2929                 /* remove invalid backref, so it won't be added back */
2930                 if (!(backref->found_dir_index &&
2931                       backref->found_dir_item &&
2932                       backref->found_inode_ref)) {
2933                         list_del(&backref->list);
2934                         free(backref);
2935                 } else {
2936                         rec->found_link++;
2937                 }
2938         }
2939
2940         /* Set nlink to 0 */
2941         key.objectid = rec->ino;
2942         key.type = BTRFS_INODE_ITEM_KEY;
2943         key.offset = 0;
2944         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2945         if (ret < 0)
2946                 goto out;
2947         if (ret > 0) {
2948                 ret = -ENOENT;
2949                 goto out;
2950         }
2951         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2952                                     struct btrfs_inode_item);
2953         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2954         btrfs_mark_buffer_dirty(path->nodes[0]);
2955         btrfs_release_path(path);
2956
2957         /*
2958          * Add back valid inode_ref/dir_item/dir_index,
2959          * add_link() will handle the nlink inc, so new nlink must be correct
2960          */
2961         list_for_each_entry(backref, &rec->backrefs, list) {
2962                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2963                                      backref->name, backref->namelen,
2964                                      backref->filetype, &backref->index, 1);
2965                 if (ret < 0)
2966                         goto out;
2967         }
2968 out:
2969         btrfs_release_path(path);
2970         return ret;
2971 }
2972
2973 static int get_highest_inode(struct btrfs_trans_handle *trans,
2974                                 struct btrfs_root *root,
2975                                 struct btrfs_path *path,
2976                                 u64 *highest_ino)
2977 {
2978         struct btrfs_key key, found_key;
2979         int ret;
2980
2981         btrfs_init_path(path);
2982         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2983         key.offset = -1;
2984         key.type = BTRFS_INODE_ITEM_KEY;
2985         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2986         if (ret == 1) {
2987                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2988                                 path->slots[0] - 1);
2989                 *highest_ino = found_key.objectid;
2990                 ret = 0;
2991         }
2992         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2993                 ret = -EOVERFLOW;
2994         btrfs_release_path(path);
2995         return ret;
2996 }
2997
2998 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2999                                struct btrfs_root *root,
3000                                struct btrfs_path *path,
3001                                struct inode_record *rec)
3002 {
3003         char *dir_name = "lost+found";
3004         char namebuf[BTRFS_NAME_LEN] = {0};
3005         u64 lost_found_ino;
3006         u32 mode = 0700;
3007         u8 type = 0;
3008         int namelen = 0;
3009         int name_recovered = 0;
3010         int type_recovered = 0;
3011         int ret = 0;
3012
3013         /*
3014          * Get file name and type first before these invalid inode ref
3015          * are deleted by remove_all_invalid_backref()
3016          */
3017         name_recovered = !find_file_name(rec, namebuf, &namelen);
3018         type_recovered = !find_file_type(rec, &type);
3019
3020         if (!name_recovered) {
3021                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3022                        rec->ino, rec->ino);
3023                 namelen = count_digits(rec->ino);
3024                 sprintf(namebuf, "%llu", rec->ino);
3025                 name_recovered = 1;
3026         }
3027         if (!type_recovered) {
3028                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3029                        rec->ino);
3030                 type = BTRFS_FT_REG_FILE;
3031                 type_recovered = 1;
3032         }
3033
3034         ret = reset_nlink(trans, root, path, rec);
3035         if (ret < 0) {
3036                 fprintf(stderr,
3037                         "Failed to reset nlink for inode %llu: %s\n",
3038                         rec->ino, strerror(-ret));
3039                 goto out;
3040         }
3041
3042         if (rec->found_link == 0) {
3043                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3044                 if (ret < 0)
3045                         goto out;
3046                 lost_found_ino++;
3047                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3048                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3049                                   mode);
3050                 if (ret < 0) {
3051                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3052                                 dir_name, strerror(-ret));
3053                         goto out;
3054                 }
3055                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3056                                      namebuf, namelen, type, NULL, 1);
3057                 /*
3058                  * Add ".INO" suffix several times to handle case where
3059                  * "FILENAME.INO" is already taken by another file.
3060                  */
3061                 while (ret == -EEXIST) {
3062                         /*
3063                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3064                          */
3065                         if (namelen + count_digits(rec->ino) + 1 >
3066                             BTRFS_NAME_LEN) {
3067                                 ret = -EFBIG;
3068                                 goto out;
3069                         }
3070                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3071                                  ".%llu", rec->ino);
3072                         namelen += count_digits(rec->ino) + 1;
3073                         ret = btrfs_add_link(trans, root, rec->ino,
3074                                              lost_found_ino, namebuf,
3075                                              namelen, type, NULL, 1);
3076                 }
3077                 if (ret < 0) {
3078                         fprintf(stderr,
3079                                 "Failed to link the inode %llu to %s dir: %s\n",
3080                                 rec->ino, dir_name, strerror(-ret));
3081                         goto out;
3082                 }
3083                 /*
3084                  * Just increase the found_link, don't actually add the
3085                  * backref. This will make things easier and this inode
3086                  * record will be freed after the repair is done.
3087                  * So fsck will not report problem about this inode.
3088                  */
3089                 rec->found_link++;
3090                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3091                        namelen, namebuf, dir_name);
3092         }
3093         printf("Fixed the nlink of inode %llu\n", rec->ino);
3094 out:
3095         /*
3096          * Clear the flag anyway, or we will loop forever for the same inode
3097          * as it will not be removed from the bad inode list and the dead loop
3098          * happens.
3099          */
3100         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3101         btrfs_release_path(path);
3102         return ret;
3103 }
3104
3105 /*
3106  * Check if there is any normal(reg or prealloc) file extent for given
3107  * ino.
3108  * This is used to determine the file type when neither its dir_index/item or
3109  * inode_item exists.
3110  *
3111  * This will *NOT* report error, if any error happens, just consider it does
3112  * not have any normal file extent.
3113  */
3114 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3115 {
3116         struct btrfs_path path;
3117         struct btrfs_key key;
3118         struct btrfs_key found_key;
3119         struct btrfs_file_extent_item *fi;
3120         u8 type;
3121         int ret = 0;
3122
3123         btrfs_init_path(&path);
3124         key.objectid = ino;
3125         key.type = BTRFS_EXTENT_DATA_KEY;
3126         key.offset = 0;
3127
3128         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3129         if (ret < 0) {
3130                 ret = 0;
3131                 goto out;
3132         }
3133         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3134                 ret = btrfs_next_leaf(root, &path);
3135                 if (ret) {
3136                         ret = 0;
3137                         goto out;
3138                 }
3139         }
3140         while (1) {
3141                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3142                                       path.slots[0]);
3143                 if (found_key.objectid != ino ||
3144                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3145                         break;
3146                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3147                                     struct btrfs_file_extent_item);
3148                 type = btrfs_file_extent_type(path.nodes[0], fi);
3149                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3150                         ret = 1;
3151                         goto out;
3152                 }
3153         }
3154 out:
3155         btrfs_release_path(&path);
3156         return ret;
3157 }
3158
3159 static u32 btrfs_type_to_imode(u8 type)
3160 {
3161         static u32 imode_by_btrfs_type[] = {
3162                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3163                 [BTRFS_FT_DIR]          = S_IFDIR,
3164                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3165                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3166                 [BTRFS_FT_FIFO]         = S_IFIFO,
3167                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3168                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3169         };
3170
3171         return imode_by_btrfs_type[(type)];
3172 }
3173
3174 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3175                                 struct btrfs_root *root,
3176                                 struct btrfs_path *path,
3177                                 struct inode_record *rec)
3178 {
3179         u8 filetype;
3180         u32 mode = 0700;
3181         int type_recovered = 0;
3182         int ret = 0;
3183
3184         printf("Trying to rebuild inode:%llu\n", rec->ino);
3185
3186         type_recovered = !find_file_type(rec, &filetype);
3187
3188         /*
3189          * Try to determine inode type if type not found.
3190          *
3191          * For found regular file extent, it must be FILE.
3192          * For found dir_item/index, it must be DIR.
3193          *
3194          * For undetermined one, use FILE as fallback.
3195          *
3196          * TODO:
3197          * 1. If found backref(inode_index/item is already handled) to it,
3198          *    it must be DIR.
3199          *    Need new inode-inode ref structure to allow search for that.
3200          */
3201         if (!type_recovered) {
3202                 if (rec->found_file_extent &&
3203                     find_normal_file_extent(root, rec->ino)) {
3204                         type_recovered = 1;
3205                         filetype = BTRFS_FT_REG_FILE;
3206                 } else if (rec->found_dir_item) {
3207                         type_recovered = 1;
3208                         filetype = BTRFS_FT_DIR;
3209                 } else if (!list_empty(&rec->orphan_extents)) {
3210                         type_recovered = 1;
3211                         filetype = BTRFS_FT_REG_FILE;
3212                 } else{
3213                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3214                                rec->ino);
3215                         type_recovered = 1;
3216                         filetype = BTRFS_FT_REG_FILE;
3217                 }
3218         }
3219
3220         ret = btrfs_new_inode(trans, root, rec->ino,
3221                               mode | btrfs_type_to_imode(filetype));
3222         if (ret < 0)
3223                 goto out;
3224
3225         /*
3226          * Here inode rebuild is done, we only rebuild the inode item,
3227          * don't repair the nlink(like move to lost+found).
3228          * That is the job of nlink repair.
3229          *
3230          * We just fill the record and return
3231          */
3232         rec->found_dir_item = 1;
3233         rec->imode = mode | btrfs_type_to_imode(filetype);
3234         rec->nlink = 0;
3235         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3236         /* Ensure the inode_nlinks repair function will be called */
3237         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3238 out:
3239         return ret;
3240 }
3241
3242 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3243                                       struct btrfs_root *root,
3244                                       struct btrfs_path *path,
3245                                       struct inode_record *rec)
3246 {
3247         struct orphan_data_extent *orphan;
3248         struct orphan_data_extent *tmp;
3249         int ret = 0;
3250
3251         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3252                 /*
3253                  * Check for conflicting file extents
3254                  *
3255                  * Here we don't know whether the extents is compressed or not,
3256                  * so we can only assume it not compressed nor data offset,
3257                  * and use its disk_len as extent length.
3258                  */
3259                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3260                                        orphan->offset, orphan->disk_len, 0);
3261                 btrfs_release_path(path);
3262                 if (ret < 0)
3263                         goto out;
3264                 if (!ret) {
3265                         fprintf(stderr,
3266                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3267                                 orphan->disk_bytenr, orphan->disk_len);
3268                         ret = btrfs_free_extent(trans,
3269                                         root->fs_info->extent_root,
3270                                         orphan->disk_bytenr, orphan->disk_len,
3271                                         0, root->objectid, orphan->objectid,
3272                                         orphan->offset);
3273                         if (ret < 0)
3274                                 goto out;
3275                 }
3276                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3277                                 orphan->offset, orphan->disk_bytenr,
3278                                 orphan->disk_len, orphan->disk_len);
3279                 if (ret < 0)
3280                         goto out;
3281
3282                 /* Update file size info */
3283                 rec->found_size += orphan->disk_len;
3284                 if (rec->found_size == rec->nbytes)
3285                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3286
3287                 /* Update the file extent hole info too */
3288                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3289                                            orphan->disk_len);
3290                 if (ret < 0)
3291                         goto out;
3292                 if (RB_EMPTY_ROOT(&rec->holes))
3293                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3294
3295                 list_del(&orphan->list);
3296                 free(orphan);
3297         }
3298         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3299 out:
3300         return ret;
3301 }
3302
3303 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3304                                         struct btrfs_root *root,
3305                                         struct btrfs_path *path,
3306                                         struct inode_record *rec)
3307 {
3308         struct rb_node *node;
3309         struct file_extent_hole *hole;
3310         int found = 0;
3311         int ret = 0;
3312
3313         node = rb_first(&rec->holes);
3314
3315         while (node) {
3316                 found = 1;
3317                 hole = rb_entry(node, struct file_extent_hole, node);
3318                 ret = btrfs_punch_hole(trans, root, rec->ino,
3319                                        hole->start, hole->len);
3320                 if (ret < 0)
3321                         goto out;
3322                 ret = del_file_extent_hole(&rec->holes, hole->start,
3323                                            hole->len);
3324                 if (ret < 0)
3325                         goto out;
3326                 if (RB_EMPTY_ROOT(&rec->holes))
3327                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3328                 node = rb_first(&rec->holes);
3329         }
3330         /* special case for a file losing all its file extent */
3331         if (!found) {
3332                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3333                                        round_up(rec->isize,
3334                                                 root->fs_info->sectorsize));
3335                 if (ret < 0)
3336                         goto out;
3337         }
3338         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3339                rec->ino, root->objectid);
3340 out:
3341         return ret;
3342 }
3343
3344 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3345 {
3346         struct btrfs_trans_handle *trans;
3347         struct btrfs_path path;
3348         int ret = 0;
3349
3350         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3351                              I_ERR_NO_ORPHAN_ITEM |
3352                              I_ERR_LINK_COUNT_WRONG |
3353                              I_ERR_NO_INODE_ITEM |
3354                              I_ERR_FILE_EXTENT_ORPHAN |
3355                              I_ERR_FILE_EXTENT_DISCOUNT|
3356                              I_ERR_FILE_NBYTES_WRONG)))
3357                 return rec->errors;
3358
3359         /*
3360          * For nlink repair, it may create a dir and add link, so
3361          * 2 for parent(256)'s dir_index and dir_item
3362          * 2 for lost+found dir's inode_item and inode_ref
3363          * 1 for the new inode_ref of the file
3364          * 2 for lost+found dir's dir_index and dir_item for the file
3365          */
3366         trans = btrfs_start_transaction(root, 7);
3367         if (IS_ERR(trans))
3368                 return PTR_ERR(trans);
3369
3370         btrfs_init_path(&path);
3371         if (rec->errors & I_ERR_NO_INODE_ITEM)
3372                 ret = repair_inode_no_item(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3374                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3376                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3378                 ret = repair_inode_isize(trans, root, &path, rec);
3379         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3380                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3381         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3382                 ret = repair_inode_nlinks(trans, root, &path, rec);
3383         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3384                 ret = repair_inode_nbytes(trans, root, &path, rec);
3385         btrfs_commit_transaction(trans, root);
3386         btrfs_release_path(&path);
3387         return ret;
3388 }
3389
3390 static int check_inode_recs(struct btrfs_root *root,
3391                             struct cache_tree *inode_cache)
3392 {
3393         struct cache_extent *cache;
3394         struct ptr_node *node;
3395         struct inode_record *rec;
3396         struct inode_backref *backref;
3397         int stage = 0;
3398         int ret = 0;
3399         int err = 0;
3400         u64 error = 0;
3401         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3402
3403         if (btrfs_root_refs(&root->root_item) == 0) {
3404                 if (!cache_tree_empty(inode_cache))
3405                         fprintf(stderr, "warning line %d\n", __LINE__);
3406                 return 0;
3407         }
3408
3409         /*
3410          * We need to repair backrefs first because we could change some of the
3411          * errors in the inode recs.
3412          *
3413          * We also need to go through and delete invalid backrefs first and then
3414          * add the correct ones second.  We do this because we may get EEXIST
3415          * when adding back the correct index because we hadn't yet deleted the
3416          * invalid index.
3417          *
3418          * For example, if we were missing a dir index then the directories
3419          * isize would be wrong, so if we fixed the isize to what we thought it
3420          * would be and then fixed the backref we'd still have a invalid fs, so
3421          * we need to add back the dir index and then check to see if the isize
3422          * is still wrong.
3423          */
3424         while (stage < 3) {
3425                 stage++;
3426                 if (stage == 3 && !err)
3427                         break;
3428
3429                 cache = search_cache_extent(inode_cache, 0);
3430                 while (repair && cache) {
3431                         node = container_of(cache, struct ptr_node, cache);
3432                         rec = node->data;
3433                         cache = next_cache_extent(cache);
3434
3435                         /* Need to free everything up and rescan */
3436                         if (stage == 3) {
3437                                 remove_cache_extent(inode_cache, &node->cache);
3438                                 free(node);
3439                                 free_inode_rec(rec);
3440                                 continue;
3441                         }
3442
3443                         if (list_empty(&rec->backrefs))
3444                                 continue;
3445
3446                         ret = repair_inode_backrefs(root, rec, inode_cache,
3447                                                     stage == 1);
3448                         if (ret < 0) {
3449                                 err = ret;
3450                                 stage = 2;
3451                                 break;
3452                         } if (ret > 0) {
3453                                 err = -EAGAIN;
3454                         }
3455                 }
3456         }
3457         if (err)
3458                 return err;
3459
3460         rec = get_inode_rec(inode_cache, root_dirid, 0);
3461         BUG_ON(IS_ERR(rec));
3462         if (rec) {
3463                 ret = check_root_dir(rec);
3464                 if (ret) {
3465                         fprintf(stderr, "root %llu root dir %llu error\n",
3466                                 (unsigned long long)root->root_key.objectid,
3467                                 (unsigned long long)root_dirid);
3468                         print_inode_error(root, rec);
3469                         error++;
3470                 }
3471         } else {
3472                 if (repair) {
3473                         struct btrfs_trans_handle *trans;
3474
3475                         trans = btrfs_start_transaction(root, 1);
3476                         if (IS_ERR(trans)) {
3477                                 err = PTR_ERR(trans);
3478                                 return err;
3479                         }
3480
3481                         fprintf(stderr,
3482                                 "root %llu missing its root dir, recreating\n",
3483                                 (unsigned long long)root->objectid);
3484
3485                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3486                         BUG_ON(ret);
3487
3488                         btrfs_commit_transaction(trans, root);
3489                         return -EAGAIN;
3490                 }
3491
3492                 fprintf(stderr, "root %llu root dir %llu not found\n",
3493                         (unsigned long long)root->root_key.objectid,
3494                         (unsigned long long)root_dirid);
3495         }
3496
3497         while (1) {
3498                 cache = search_cache_extent(inode_cache, 0);
3499                 if (!cache)
3500                         break;
3501                 node = container_of(cache, struct ptr_node, cache);
3502                 rec = node->data;
3503                 remove_cache_extent(inode_cache, &node->cache);
3504                 free(node);
3505                 if (rec->ino == root_dirid ||
3506                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3507                         free_inode_rec(rec);
3508                         continue;
3509                 }
3510
3511                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3512                         ret = check_orphan_item(root, rec->ino);
3513                         if (ret == 0)
3514                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3515                         if (can_free_inode_rec(rec)) {
3516                                 free_inode_rec(rec);
3517                                 continue;
3518                         }
3519                 }
3520
3521                 if (!rec->found_inode_item)
3522                         rec->errors |= I_ERR_NO_INODE_ITEM;
3523                 if (rec->found_link != rec->nlink)
3524                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3525                 if (repair) {
3526                         ret = try_repair_inode(root, rec);
3527                         if (ret == 0 && can_free_inode_rec(rec)) {
3528                                 free_inode_rec(rec);
3529                                 continue;
3530                         }
3531                         ret = 0;
3532                 }
3533
3534                 if (!(repair && ret == 0))
3535                         error++;
3536                 print_inode_error(root, rec);
3537                 list_for_each_entry(backref, &rec->backrefs, list) {
3538                         if (!backref->found_dir_item)
3539                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3540                         if (!backref->found_dir_index)
3541                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3542                         if (!backref->found_inode_ref)
3543                                 backref->errors |= REF_ERR_NO_INODE_REF;
3544                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3545                                 " namelen %u name %s filetype %d errors %x",
3546                                 (unsigned long long)backref->dir,
3547                                 (unsigned long long)backref->index,
3548                                 backref->namelen, backref->name,
3549                                 backref->filetype, backref->errors);
3550                         print_ref_error(backref->errors);
3551                 }
3552                 free_inode_rec(rec);
3553         }
3554         return (error > 0) ? -1 : 0;
3555 }
3556
3557 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3558                                         u64 objectid)
3559 {
3560         struct cache_extent *cache;
3561         struct root_record *rec = NULL;
3562         int ret;
3563
3564         cache = lookup_cache_extent(root_cache, objectid, 1);
3565         if (cache) {
3566                 rec = container_of(cache, struct root_record, cache);
3567         } else {
3568                 rec = calloc(1, sizeof(*rec));
3569                 if (!rec)
3570                         return ERR_PTR(-ENOMEM);
3571                 rec->objectid = objectid;
3572                 INIT_LIST_HEAD(&rec->backrefs);
3573                 rec->cache.start = objectid;
3574                 rec->cache.size = 1;
3575
3576                 ret = insert_cache_extent(root_cache, &rec->cache);
3577                 if (ret)
3578                         return ERR_PTR(-EEXIST);
3579         }
3580         return rec;
3581 }
3582
3583 static struct root_backref *get_root_backref(struct root_record *rec,
3584                                              u64 ref_root, u64 dir, u64 index,
3585                                              const char *name, int namelen)
3586 {
3587         struct root_backref *backref;
3588
3589         list_for_each_entry(backref, &rec->backrefs, list) {
3590                 if (backref->ref_root != ref_root || backref->dir != dir ||
3591                     backref->namelen != namelen)
3592                         continue;
3593                 if (memcmp(name, backref->name, namelen))
3594                         continue;
3595                 return backref;
3596         }
3597
3598         backref = calloc(1, sizeof(*backref) + namelen + 1);
3599         if (!backref)
3600                 return NULL;
3601         backref->ref_root = ref_root;
3602         backref->dir = dir;
3603         backref->index = index;
3604         backref->namelen = namelen;
3605         memcpy(backref->name, name, namelen);
3606         backref->name[namelen] = '\0';
3607         list_add_tail(&backref->list, &rec->backrefs);
3608         return backref;
3609 }
3610
3611 static void free_root_record(struct cache_extent *cache)
3612 {
3613         struct root_record *rec;
3614         struct root_backref *backref;
3615
3616         rec = container_of(cache, struct root_record, cache);
3617         while (!list_empty(&rec->backrefs)) {
3618                 backref = to_root_backref(rec->backrefs.next);
3619                 list_del(&backref->list);
3620                 free(backref);
3621         }
3622
3623         free(rec);
3624 }
3625
3626 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3627
3628 static int add_root_backref(struct cache_tree *root_cache,
3629                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3630                             const char *name, int namelen,
3631                             int item_type, int errors)
3632 {
3633         struct root_record *rec;
3634         struct root_backref *backref;
3635
3636         rec = get_root_rec(root_cache, root_id);
3637         BUG_ON(IS_ERR(rec));
3638         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3639         BUG_ON(!backref);
3640
3641         backref->errors |= errors;
3642
3643         if (item_type != BTRFS_DIR_ITEM_KEY) {
3644                 if (backref->found_dir_index || backref->found_back_ref ||
3645                     backref->found_forward_ref) {
3646                         if (backref->index != index)
3647                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3648                 } else {
3649                         backref->index = index;
3650                 }
3651         }
3652
3653         if (item_type == BTRFS_DIR_ITEM_KEY) {
3654                 if (backref->found_forward_ref)
3655                         rec->found_ref++;
3656                 backref->found_dir_item = 1;
3657         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3658                 backref->found_dir_index = 1;
3659         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3660                 if (backref->found_forward_ref)
3661                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3662                 else if (backref->found_dir_item)
3663                         rec->found_ref++;
3664                 backref->found_forward_ref = 1;
3665         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3666                 if (backref->found_back_ref)
3667                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3668                 backref->found_back_ref = 1;
3669         } else {
3670                 BUG_ON(1);
3671         }
3672
3673         if (backref->found_forward_ref && backref->found_dir_item)
3674                 backref->reachable = 1;
3675         return 0;
3676 }
3677
3678 static int merge_root_recs(struct btrfs_root *root,
3679                            struct cache_tree *src_cache,
3680                            struct cache_tree *dst_cache)
3681 {
3682         struct cache_extent *cache;
3683         struct ptr_node *node;
3684         struct inode_record *rec;
3685         struct inode_backref *backref;
3686         int ret = 0;
3687
3688         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3689                 free_inode_recs_tree(src_cache);
3690                 return 0;
3691         }
3692
3693         while (1) {
3694                 cache = search_cache_extent(src_cache, 0);
3695                 if (!cache)
3696                         break;
3697                 node = container_of(cache, struct ptr_node, cache);
3698                 rec = node->data;
3699                 remove_cache_extent(src_cache, &node->cache);
3700                 free(node);
3701
3702                 ret = is_child_root(root, root->objectid, rec->ino);
3703                 if (ret < 0)
3704                         break;
3705                 else if (ret == 0)
3706                         goto skip;
3707
3708                 list_for_each_entry(backref, &rec->backrefs, list) {
3709                         BUG_ON(backref->found_inode_ref);
3710                         if (backref->found_dir_item)
3711                                 add_root_backref(dst_cache, rec->ino,
3712                                         root->root_key.objectid, backref->dir,
3713                                         backref->index, backref->name,
3714                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3715                                         backref->errors);
3716                         if (backref->found_dir_index)
3717                                 add_root_backref(dst_cache, rec->ino,
3718                                         root->root_key.objectid, backref->dir,
3719                                         backref->index, backref->name,
3720                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3721                                         backref->errors);
3722                 }
3723 skip:
3724                 free_inode_rec(rec);
3725         }
3726         if (ret < 0)
3727                 return ret;
3728         return 0;
3729 }
3730
3731 static int check_root_refs(struct btrfs_root *root,
3732                            struct cache_tree *root_cache)
3733 {
3734         struct root_record *rec;
3735         struct root_record *ref_root;
3736         struct root_backref *backref;
3737         struct cache_extent *cache;
3738         int loop = 1;
3739         int ret;
3740         int error;
3741         int errors = 0;
3742
3743         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3744         BUG_ON(IS_ERR(rec));
3745         rec->found_ref = 1;
3746
3747         /* fixme: this can not detect circular references */
3748         while (loop) {
3749                 loop = 0;
3750                 cache = search_cache_extent(root_cache, 0);
3751                 while (1) {
3752                         if (!cache)
3753                                 break;
3754                         rec = container_of(cache, struct root_record, cache);
3755                         cache = next_cache_extent(cache);
3756
3757                         if (rec->found_ref == 0)
3758                                 continue;
3759
3760                         list_for_each_entry(backref, &rec->backrefs, list) {
3761                                 if (!backref->reachable)
3762                                         continue;
3763
3764                                 ref_root = get_root_rec(root_cache,
3765                                                         backref->ref_root);
3766                                 BUG_ON(IS_ERR(ref_root));
3767                                 if (ref_root->found_ref > 0)
3768                                         continue;
3769
3770                                 backref->reachable = 0;
3771                                 rec->found_ref--;
3772                                 if (rec->found_ref == 0)
3773                                         loop = 1;
3774                         }
3775                 }
3776         }
3777
3778         cache = search_cache_extent(root_cache, 0);
3779         while (1) {
3780                 if (!cache)
3781                         break;
3782                 rec = container_of(cache, struct root_record, cache);
3783                 cache = next_cache_extent(cache);
3784
3785                 if (rec->found_ref == 0 &&
3786                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3787                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3788                         ret = check_orphan_item(root->fs_info->tree_root,
3789                                                 rec->objectid);
3790                         if (ret == 0)
3791                                 continue;
3792
3793                         /*
3794                          * If we don't have a root item then we likely just have
3795                          * a dir item in a snapshot for this root but no actual
3796                          * ref key or anything so it's meaningless.
3797                          */
3798                         if (!rec->found_root_item)
3799                                 continue;
3800                         errors++;
3801                         fprintf(stderr, "fs tree %llu not referenced\n",
3802                                 (unsigned long long)rec->objectid);
3803                 }
3804
3805                 error = 0;
3806                 if (rec->found_ref > 0 && !rec->found_root_item)
3807                         error = 1;
3808                 list_for_each_entry(backref, &rec->backrefs, list) {
3809                         if (!backref->found_dir_item)
3810                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3811                         if (!backref->found_dir_index)
3812                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3813                         if (!backref->found_back_ref)
3814                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3815                         if (!backref->found_forward_ref)
3816                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3817                         if (backref->reachable && backref->errors)
3818                                 error = 1;
3819                 }
3820                 if (!error)
3821                         continue;
3822
3823                 errors++;
3824                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3825                         (unsigned long long)rec->objectid, rec->found_ref,
3826                          rec->found_root_item ? "" : "not found");
3827
3828                 list_for_each_entry(backref, &rec->backrefs, list) {
3829                         if (!backref->reachable)
3830                                 continue;
3831                         if (!backref->errors && rec->found_root_item)
3832                                 continue;
3833                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3834                                 " index %llu namelen %u name %s errors %x\n",
3835                                 (unsigned long long)backref->ref_root,
3836                                 (unsigned long long)backref->dir,
3837                                 (unsigned long long)backref->index,
3838                                 backref->namelen, backref->name,
3839                                 backref->errors);
3840                         print_ref_error(backref->errors);
3841                 }
3842         }
3843         return errors > 0 ? 1 : 0;
3844 }
3845
3846 static int process_root_ref(struct extent_buffer *eb, int slot,
3847                             struct btrfs_key *key,
3848                             struct cache_tree *root_cache)
3849 {
3850         u64 dirid;
3851         u64 index;
3852         u32 len;
3853         u32 name_len;
3854         struct btrfs_root_ref *ref;
3855         char namebuf[BTRFS_NAME_LEN];
3856         int error;
3857
3858         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3859
3860         dirid = btrfs_root_ref_dirid(eb, ref);
3861         index = btrfs_root_ref_sequence(eb, ref);
3862         name_len = btrfs_root_ref_name_len(eb, ref);
3863
3864         if (name_len <= BTRFS_NAME_LEN) {
3865                 len = name_len;
3866                 error = 0;
3867         } else {
3868                 len = BTRFS_NAME_LEN;
3869                 error = REF_ERR_NAME_TOO_LONG;
3870         }
3871         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3872
3873         if (key->type == BTRFS_ROOT_REF_KEY) {
3874                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3875                                  index, namebuf, len, key->type, error);
3876         } else {
3877                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3878                                  index, namebuf, len, key->type, error);
3879         }
3880         return 0;
3881 }
3882
3883 static void free_corrupt_block(struct cache_extent *cache)
3884 {
3885         struct btrfs_corrupt_block *corrupt;
3886
3887         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3888         free(corrupt);
3889 }
3890
3891 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3892
3893 /*
3894  * Repair the btree of the given root.
3895  *
3896  * The fix is to remove the node key in corrupt_blocks cache_tree.
3897  * and rebalance the tree.
3898  * After the fix, the btree should be writeable.
3899  */
3900 static int repair_btree(struct btrfs_root *root,
3901                         struct cache_tree *corrupt_blocks)
3902 {
3903         struct btrfs_trans_handle *trans;
3904         struct btrfs_path path;
3905         struct btrfs_corrupt_block *corrupt;
3906         struct cache_extent *cache;
3907         struct btrfs_key key;
3908         u64 offset;
3909         int level;
3910         int ret = 0;
3911
3912         if (cache_tree_empty(corrupt_blocks))
3913                 return 0;
3914
3915         trans = btrfs_start_transaction(root, 1);
3916         if (IS_ERR(trans)) {
3917                 ret = PTR_ERR(trans);
3918                 fprintf(stderr, "Error starting transaction: %s\n",
3919                         strerror(-ret));
3920                 return ret;
3921         }
3922         btrfs_init_path(&path);
3923         cache = first_cache_extent(corrupt_blocks);
3924         while (cache) {
3925                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3926                                        cache);
3927                 level = corrupt->level;
3928                 path.lowest_level = level;
3929                 key.objectid = corrupt->key.objectid;
3930                 key.type = corrupt->key.type;
3931                 key.offset = corrupt->key.offset;
3932
3933                 /*
3934                  * Here we don't want to do any tree balance, since it may
3935                  * cause a balance with corrupted brother leaf/node,
3936                  * so ins_len set to 0 here.
3937                  * Balance will be done after all corrupt node/leaf is deleted.
3938                  */
3939                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3940                 if (ret < 0)
3941                         goto out;
3942                 offset = btrfs_node_blockptr(path.nodes[level],
3943                                              path.slots[level]);
3944
3945                 /* Remove the ptr */
3946                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3947                 if (ret < 0)
3948                         goto out;
3949                 /*
3950                  * Remove the corresponding extent
3951                  * return value is not concerned.
3952                  */
3953                 btrfs_release_path(&path);
3954                 ret = btrfs_free_extent(trans, root, offset,
3955                                 root->fs_info->nodesize, 0,
3956                                 root->root_key.objectid, level - 1, 0);
3957                 cache = next_cache_extent(cache);
3958         }
3959
3960         /* Balance the btree using btrfs_search_slot() */
3961         cache = first_cache_extent(corrupt_blocks);
3962         while (cache) {
3963                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3964                                        cache);
3965                 memcpy(&key, &corrupt->key, sizeof(key));
3966                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3967                 if (ret < 0)
3968                         goto out;
3969                 /* return will always >0 since it won't find the item */
3970                 ret = 0;
3971                 btrfs_release_path(&path);
3972                 cache = next_cache_extent(cache);
3973         }
3974 out:
3975         btrfs_commit_transaction(trans, root);
3976         btrfs_release_path(&path);
3977         return ret;
3978 }
3979
3980 static int check_fs_root(struct btrfs_root *root,
3981                          struct cache_tree *root_cache,
3982                          struct walk_control *wc)
3983 {
3984         int ret = 0;
3985         int err = 0;
3986         int wret;
3987         int level;
3988         struct btrfs_path path;
3989         struct shared_node root_node;
3990         struct root_record *rec;
3991         struct btrfs_root_item *root_item = &root->root_item;
3992         struct cache_tree corrupt_blocks;
3993         struct orphan_data_extent *orphan;
3994         struct orphan_data_extent *tmp;
3995         enum btrfs_tree_block_status status;
3996         struct node_refs nrefs;
3997
3998         /*
3999          * Reuse the corrupt_block cache tree to record corrupted tree block
4000          *
4001          * Unlike the usage in extent tree check, here we do it in a per
4002          * fs/subvol tree base.
4003          */
4004         cache_tree_init(&corrupt_blocks);
4005         root->fs_info->corrupt_blocks = &corrupt_blocks;
4006
4007         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4008                 rec = get_root_rec(root_cache, root->root_key.objectid);
4009                 BUG_ON(IS_ERR(rec));
4010                 if (btrfs_root_refs(root_item) > 0)
4011                         rec->found_root_item = 1;
4012         }
4013
4014         btrfs_init_path(&path);
4015         memset(&root_node, 0, sizeof(root_node));
4016         cache_tree_init(&root_node.root_cache);
4017         cache_tree_init(&root_node.inode_cache);
4018         memset(&nrefs, 0, sizeof(nrefs));
4019
4020         /* Move the orphan extent record to corresponding inode_record */
4021         list_for_each_entry_safe(orphan, tmp,
4022                                  &root->orphan_data_extents, list) {
4023                 struct inode_record *inode;
4024
4025                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4026                                       1);
4027                 BUG_ON(IS_ERR(inode));
4028                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4029                 list_move(&orphan->list, &inode->orphan_extents);
4030         }
4031
4032         level = btrfs_header_level(root->node);
4033         memset(wc->nodes, 0, sizeof(wc->nodes));
4034         wc->nodes[level] = &root_node;
4035         wc->active_node = level;
4036         wc->root_level = level;
4037
4038         /* We may not have checked the root block, lets do that now */
4039         if (btrfs_is_leaf(root->node))
4040                 status = btrfs_check_leaf(root, NULL, root->node);
4041         else
4042                 status = btrfs_check_node(root, NULL, root->node);
4043         if (status != BTRFS_TREE_BLOCK_CLEAN)
4044                 return -EIO;
4045
4046         if (btrfs_root_refs(root_item) > 0 ||
4047             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4048                 path.nodes[level] = root->node;
4049                 extent_buffer_get(root->node);
4050                 path.slots[level] = 0;
4051         } else {
4052                 struct btrfs_key key;
4053                 struct btrfs_disk_key found_key;
4054
4055                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4056                 level = root_item->drop_level;
4057                 path.lowest_level = level;
4058                 if (level > btrfs_header_level(root->node) ||
4059                     level >= BTRFS_MAX_LEVEL) {
4060                         error("ignoring invalid drop level: %u", level);
4061                         goto skip_walking;
4062                 }
4063                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4064                 if (wret < 0)
4065                         goto skip_walking;
4066                 btrfs_node_key(path.nodes[level], &found_key,
4067                                 path.slots[level]);
4068                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4069                                         sizeof(found_key)));
4070         }
4071
4072         while (1) {
4073                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4074                 if (wret < 0)
4075                         ret = wret;
4076                 if (wret != 0)
4077                         break;
4078
4079                 wret = walk_up_tree(root, &path, wc, &level);
4080                 if (wret < 0)
4081                         ret = wret;
4082                 if (wret != 0)
4083                         break;
4084         }
4085 skip_walking:
4086         btrfs_release_path(&path);
4087
4088         if (!cache_tree_empty(&corrupt_blocks)) {
4089                 struct cache_extent *cache;
4090                 struct btrfs_corrupt_block *corrupt;
4091
4092                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4093                        root->root_key.objectid);
4094                 cache = first_cache_extent(&corrupt_blocks);
4095                 while (cache) {
4096                         corrupt = container_of(cache,
4097                                                struct btrfs_corrupt_block,
4098                                                cache);
4099                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4100                                cache->start, corrupt->level,
4101                                corrupt->key.objectid, corrupt->key.type,
4102                                corrupt->key.offset);
4103                         cache = next_cache_extent(cache);
4104                 }
4105                 if (repair) {
4106                         printf("Try to repair the btree for root %llu\n",
4107                                root->root_key.objectid);
4108                         ret = repair_btree(root, &corrupt_blocks);
4109                         if (ret < 0)
4110                                 fprintf(stderr, "Failed to repair btree: %s\n",
4111                                         strerror(-ret));
4112                         if (!ret)
4113                                 printf("Btree for root %llu is fixed\n",
4114                                        root->root_key.objectid);
4115                 }
4116         }
4117
4118         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4119         if (err < 0)
4120                 ret = err;
4121
4122         if (root_node.current) {
4123                 root_node.current->checked = 1;
4124                 maybe_free_inode_rec(&root_node.inode_cache,
4125                                 root_node.current);
4126         }
4127
4128         err = check_inode_recs(root, &root_node.inode_cache);
4129         if (!ret)
4130                 ret = err;
4131
4132         free_corrupt_blocks_tree(&corrupt_blocks);
4133         root->fs_info->corrupt_blocks = NULL;
4134         free_orphan_data_extents(&root->orphan_data_extents);
4135         return ret;
4136 }
4137
4138 static int fs_root_objectid(u64 objectid)
4139 {
4140         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4141             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4142                 return 1;
4143         return is_fstree(objectid);
4144 }
4145
4146 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4147                           struct cache_tree *root_cache)
4148 {
4149         struct btrfs_path path;
4150         struct btrfs_key key;
4151         struct walk_control wc;
4152         struct extent_buffer *leaf, *tree_node;
4153         struct btrfs_root *tmp_root;
4154         struct btrfs_root *tree_root = fs_info->tree_root;
4155         int ret;
4156         int err = 0;
4157
4158         if (ctx.progress_enabled) {
4159                 ctx.tp = TASK_FS_ROOTS;
4160                 task_start(ctx.info);
4161         }
4162
4163         /*
4164          * Just in case we made any changes to the extent tree that weren't
4165          * reflected into the free space cache yet.
4166          */
4167         if (repair)
4168                 reset_cached_block_groups(fs_info);
4169         memset(&wc, 0, sizeof(wc));
4170         cache_tree_init(&wc.shared);
4171         btrfs_init_path(&path);
4172
4173 again:
4174         key.offset = 0;
4175         key.objectid = 0;
4176         key.type = BTRFS_ROOT_ITEM_KEY;
4177         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4178         if (ret < 0) {
4179                 err = 1;
4180                 goto out;
4181         }
4182         tree_node = tree_root->node;
4183         while (1) {
4184                 if (tree_node != tree_root->node) {
4185                         free_root_recs_tree(root_cache);
4186                         btrfs_release_path(&path);
4187                         goto again;
4188                 }
4189                 leaf = path.nodes[0];
4190                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4191                         ret = btrfs_next_leaf(tree_root, &path);
4192                         if (ret) {
4193                                 if (ret < 0)
4194                                         err = 1;
4195                                 break;
4196                         }
4197                         leaf = path.nodes[0];
4198                 }
4199                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4200                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4201                     fs_root_objectid(key.objectid)) {
4202                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4203                                 tmp_root = btrfs_read_fs_root_no_cache(
4204                                                 fs_info, &key);
4205                         } else {
4206                                 key.offset = (u64)-1;
4207                                 tmp_root = btrfs_read_fs_root(
4208                                                 fs_info, &key);
4209                         }
4210                         if (IS_ERR(tmp_root)) {
4211                                 err = 1;
4212                                 goto next;
4213                         }
4214                         ret = check_fs_root(tmp_root, root_cache, &wc);
4215                         if (ret == -EAGAIN) {
4216                                 free_root_recs_tree(root_cache);
4217                                 btrfs_release_path(&path);
4218                                 goto again;
4219                         }
4220                         if (ret)
4221                                 err = 1;
4222                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4223                                 btrfs_free_fs_root(tmp_root);
4224                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4225                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4226                         process_root_ref(leaf, path.slots[0], &key,
4227                                          root_cache);
4228                 }
4229 next:
4230                 path.slots[0]++;
4231         }
4232 out:
4233         btrfs_release_path(&path);
4234         if (err)
4235                 free_extent_cache_tree(&wc.shared);
4236         if (!cache_tree_empty(&wc.shared))
4237                 fprintf(stderr, "warning line %d\n", __LINE__);
4238
4239         task_stop(ctx.info);
4240
4241         return err;
4242 }
4243
4244 /*
4245  * Find the @index according by @ino and name.
4246  * Notice:time efficiency is O(N)
4247  *
4248  * @root:       the root of the fs/file tree
4249  * @index_ret:  the index as return value
4250  * @namebuf:    the name to match
4251  * @name_len:   the length of name to match
4252  * @file_type:  the file_type of INODE_ITEM to match
4253  *
4254  * Returns 0 if found and *@index_ret will be modified with right value
4255  * Returns< 0 not found and *@index_ret will be (u64)-1
4256  */
4257 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4258                           u64 *index_ret, char *namebuf, u32 name_len,
4259                           u8 file_type)
4260 {
4261         struct btrfs_path path;
4262         struct extent_buffer *node;
4263         struct btrfs_dir_item *di;
4264         struct btrfs_key key;
4265         struct btrfs_key location;
4266         char name[BTRFS_NAME_LEN] = {0};
4267
4268         u32 total;
4269         u32 cur = 0;
4270         u32 len;
4271         u32 data_len;
4272         u8 filetype;
4273         int slot;
4274         int ret;
4275
4276         ASSERT(index_ret);
4277
4278         /* search from the last index */
4279         key.objectid = dirid;
4280         key.offset = (u64)-1;
4281         key.type = BTRFS_DIR_INDEX_KEY;
4282
4283         btrfs_init_path(&path);
4284         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4285         if (ret < 0)
4286                 return ret;
4287
4288 loop:
4289         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4290         if (ret) {
4291                 ret = -ENOENT;
4292                 *index_ret = (64)-1;
4293                 goto out;
4294         }
4295         /* Check whether inode_id/filetype/name match */
4296         node = path.nodes[0];
4297         slot = path.slots[0];
4298         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4299         total = btrfs_item_size_nr(node, slot);
4300         while (cur < total) {
4301                 ret = -ENOENT;
4302                 len = btrfs_dir_name_len(node, di);
4303                 data_len = btrfs_dir_data_len(node, di);
4304
4305                 btrfs_dir_item_key_to_cpu(node, di, &location);
4306                 if (location.objectid != location_id ||
4307                     location.type != BTRFS_INODE_ITEM_KEY ||
4308                     location.offset != 0)
4309                         goto next;
4310
4311                 filetype = btrfs_dir_type(node, di);
4312                 if (file_type != filetype)
4313                         goto next;
4314
4315                 if (len > BTRFS_NAME_LEN)
4316                         len = BTRFS_NAME_LEN;
4317
4318                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4319                 if (len != name_len || strncmp(namebuf, name, len))
4320                         goto next;
4321
4322                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4323                 *index_ret = key.offset;
4324                 ret = 0;
4325                 goto out;
4326 next:
4327                 len += sizeof(*di) + data_len;
4328                 di = (struct btrfs_dir_item *)((char *)di + len);
4329                 cur += len;
4330         }
4331         goto loop;
4332
4333 out:
4334         btrfs_release_path(&path);
4335         return ret;
4336 }
4337
4338 /*
4339  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4340  * INODE_REF/INODE_EXTREF match.
4341  *
4342  * @root:       the root of the fs/file tree
4343  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4344  *              value while find index
4345  * @location_key: location key of the struct btrfs_dir_item to match
4346  * @name:       the name to match
4347  * @namelen:    the length of name
4348  * @file_type:  the type of file to math
4349  *
4350  * Return 0 if no error occurred.
4351  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4352  * DIR_ITEM/DIR_INDEX
4353  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4354  * and DIR_ITEM/DIR_INDEX mismatch
4355  */
4356 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4357                          struct btrfs_key *location_key, char *name,
4358                          u32 namelen, u8 file_type)
4359 {
4360         struct btrfs_path path;
4361         struct extent_buffer *node;
4362         struct btrfs_dir_item *di;
4363         struct btrfs_key location;
4364         char namebuf[BTRFS_NAME_LEN] = {0};
4365         u32 total;
4366         u32 cur = 0;
4367         u32 len;
4368         u32 data_len;
4369         u8 filetype;
4370         int slot;
4371         int ret;
4372
4373         /* get the index by traversing all index */
4374         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4375                 ret = find_dir_index(root, key->objectid,
4376                                      location_key->objectid, &key->offset,
4377                                      name, namelen, file_type);
4378                 if (ret)
4379                         ret = DIR_INDEX_MISSING;
4380                 return ret;
4381         }
4382
4383         btrfs_init_path(&path);
4384         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4385         if (ret) {
4386                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4387                         DIR_INDEX_MISSING;
4388                 goto out;
4389         }
4390
4391         /* Check whether inode_id/filetype/name match */
4392         node = path.nodes[0];
4393         slot = path.slots[0];
4394         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4395         total = btrfs_item_size_nr(node, slot);
4396         while (cur < total) {
4397                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4398                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4399
4400                 len = btrfs_dir_name_len(node, di);
4401                 data_len = btrfs_dir_data_len(node, di);
4402
4403                 btrfs_dir_item_key_to_cpu(node, di, &location);
4404                 if (location.objectid != location_key->objectid ||
4405                     location.type != location_key->type ||
4406                     location.offset != location_key->offset)
4407                         goto next;
4408
4409                 filetype = btrfs_dir_type(node, di);
4410                 if (file_type != filetype)
4411                         goto next;
4412
4413                 if (len > BTRFS_NAME_LEN) {
4414                         len = BTRFS_NAME_LEN;
4415                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4416                         root->objectid,
4417                         key->type == BTRFS_DIR_ITEM_KEY ?
4418                         "DIR_ITEM" : "DIR_INDEX",
4419                         key->objectid, key->offset, len);
4420                 }
4421                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4422                                    len);
4423                 if (len != namelen || strncmp(namebuf, name, len))
4424                         goto next;
4425
4426                 ret = 0;
4427                 goto out;
4428 next:
4429                 len += sizeof(*di) + data_len;
4430                 di = (struct btrfs_dir_item *)((char *)di + len);
4431                 cur += len;
4432         }
4433
4434 out:
4435         btrfs_release_path(&path);
4436         return ret;
4437 }
4438
4439 /*
4440  * Prints inode ref error message
4441  */
4442 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4443                                 u64 index, const char *namebuf, int name_len,
4444                                 u8 filetype, int err)
4445 {
4446         if (!err)
4447                 return;
4448
4449         /* root dir error */
4450         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4451                 error(
4452         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4453                       root->objectid, key->objectid, key->offset, namebuf);
4454                 return;
4455         }
4456
4457         /* normal error */
4458         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4459                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4460                       root->objectid, key->offset,
4461                       btrfs_name_hash(namebuf, name_len),
4462                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4463                       namebuf, filetype);
4464         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4465                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4466                       root->objectid, key->offset, index,
4467                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4468                       namebuf, filetype);
4469 }
4470
4471 /*
4472  * Traverse the given INODE_REF and call find_dir_item() to find related
4473  * DIR_ITEM/DIR_INDEX.
4474  *
4475  * @root:       the root of the fs/file tree
4476  * @ref_key:    the key of the INODE_REF
4477  * @refs:       the count of INODE_REF
4478  * @mode:       the st_mode of INODE_ITEM
4479  *
4480  * Return 0 if no error occurred.
4481  */
4482 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4483                            struct btrfs_path *path, char *name_ret,
4484                            u32 *namelen_ret, u64 *refs, int mode)
4485 {
4486         struct btrfs_key key;
4487         struct btrfs_key location;
4488         struct btrfs_inode_ref *ref;
4489         struct extent_buffer *node;
4490         char namebuf[BTRFS_NAME_LEN] = {0};
4491         u32 total;
4492         u32 cur = 0;
4493         u32 len;
4494         u32 name_len;
4495         u64 index;
4496         int err = 0;
4497         int tmp_err;
4498         int slot;
4499
4500         location.objectid = ref_key->objectid;
4501         location.type = BTRFS_INODE_ITEM_KEY;
4502         location.offset = 0;
4503         node = path->nodes[0];
4504         slot = path->slots[0];
4505
4506         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4507         total = btrfs_item_size_nr(node, slot);
4508
4509 next:
4510         /* Update inode ref count */
4511         (*refs)++;
4512
4513         tmp_err = 0;
4514         index = btrfs_inode_ref_index(node, ref);
4515         name_len = btrfs_inode_ref_name_len(node, ref);
4516         if (cur + sizeof(*ref) + name_len > total ||
4517             name_len > BTRFS_NAME_LEN) {
4518                 warning("root %llu INODE_REF[%llu %llu] name too long",
4519                         root->objectid, ref_key->objectid, ref_key->offset);
4520
4521                 if (total < cur + sizeof(*ref))
4522                         goto out;
4523                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4524         } else {
4525                 len = name_len;
4526         }
4527
4528         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4529
4530         /* copy the fisrt name found to name_ret */
4531         if (*refs == 1 && name_ret) {
4532                 memcpy(name_ret, namebuf, len);
4533                 *namelen_ret = len;
4534         }
4535
4536         /* Check root dir ref */
4537         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4538                 if (index != 0 || len != strlen("..") ||
4539                     strncmp("..", namebuf, len) ||
4540                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4541                         /* set err bits then repair will delete the ref */
4542                         err |= DIR_INDEX_MISSING;
4543                         err |= DIR_ITEM_MISSING;
4544                 }
4545                 goto end;
4546         }
4547
4548         /* Find related DIR_INDEX */
4549         key.objectid = ref_key->offset;
4550         key.type = BTRFS_DIR_INDEX_KEY;
4551         key.offset = index;
4552         tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4553
4554         /* Find related dir_item */
4555         key.objectid = ref_key->offset;
4556         key.type = BTRFS_DIR_ITEM_KEY;
4557         key.offset = btrfs_name_hash(namebuf, len);
4558         tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4559
4560 end:
4561         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4562                             imode_to_type(mode), tmp_err);
4563         err |= tmp_err;
4564         len = sizeof(*ref) + name_len;
4565         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4566         cur += len;
4567         if (cur < total)
4568                 goto next;
4569
4570 out:
4571         return err;
4572 }
4573
4574 /*
4575  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4576  * DIR_ITEM/DIR_INDEX.
4577  *
4578  * @root:       the root of the fs/file tree
4579  * @ref_key:    the key of the INODE_EXTREF
4580  * @refs:       the count of INODE_EXTREF
4581  * @mode:       the st_mode of INODE_ITEM
4582  *
4583  * Return 0 if no error occurred.
4584  */
4585 static int check_inode_extref(struct btrfs_root *root,
4586                               struct btrfs_key *ref_key,
4587                               struct extent_buffer *node, int slot, u64 *refs,
4588                               int mode)
4589 {
4590         struct btrfs_key key;
4591         struct btrfs_key location;
4592         struct btrfs_inode_extref *extref;
4593         char namebuf[BTRFS_NAME_LEN] = {0};
4594         u32 total;
4595         u32 cur = 0;
4596         u32 len;
4597         u32 name_len;
4598         u64 index;
4599         u64 parent;
4600         int ret;
4601         int err = 0;
4602
4603         location.objectid = ref_key->objectid;
4604         location.type = BTRFS_INODE_ITEM_KEY;
4605         location.offset = 0;
4606
4607         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4608         total = btrfs_item_size_nr(node, slot);
4609
4610 next:
4611         /* update inode ref count */
4612         (*refs)++;
4613         name_len = btrfs_inode_extref_name_len(node, extref);
4614         index = btrfs_inode_extref_index(node, extref);
4615         parent = btrfs_inode_extref_parent(node, extref);
4616         if (name_len <= BTRFS_NAME_LEN) {
4617                 len = name_len;
4618         } else {
4619                 len = BTRFS_NAME_LEN;
4620                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4621                         root->objectid, ref_key->objectid, ref_key->offset);
4622         }
4623         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4624
4625         /* Check root dir ref name */
4626         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4627                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4628                       root->objectid, ref_key->objectid, ref_key->offset,
4629                       namebuf);
4630                 err |= ROOT_DIR_ERROR;
4631         }
4632
4633         /* find related dir_index */
4634         key.objectid = parent;
4635         key.type = BTRFS_DIR_INDEX_KEY;
4636         key.offset = index;
4637         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4638         err |= ret;
4639
4640         /* find related dir_item */
4641         key.objectid = parent;
4642         key.type = BTRFS_DIR_ITEM_KEY;
4643         key.offset = btrfs_name_hash(namebuf, len);
4644         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4645         err |= ret;
4646
4647         len = sizeof(*extref) + name_len;
4648         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4649         cur += len;
4650
4651         if (cur < total)
4652                 goto next;
4653
4654         return err;
4655 }
4656
4657 /*
4658  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4659  * DIR_ITEM/DIR_INDEX match.
4660  * Return with @index_ret.
4661  *
4662  * @root:       the root of the fs/file tree
4663  * @key:        the key of the INODE_REF/INODE_EXTREF
4664  * @name:       the name in the INODE_REF/INODE_EXTREF
4665  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4666  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4667  *              value (64)-1 means do not check index
4668  * @ext_ref:    the EXTENDED_IREF feature
4669  *
4670  * Return 0 if no error occurred.
4671  * Return >0 for error bitmap
4672  */
4673 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4674                           char *name, int namelen, u64 *index_ret,
4675                           unsigned int ext_ref)
4676 {
4677         struct btrfs_path path;
4678         struct btrfs_inode_ref *ref;
4679         struct btrfs_inode_extref *extref;
4680         struct extent_buffer *node;
4681         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4682         u32 total;
4683         u32 cur = 0;
4684         u32 len;
4685         u32 ref_namelen;
4686         u64 ref_index;
4687         u64 parent;
4688         u64 dir_id;
4689         int slot;
4690         int ret;
4691
4692         ASSERT(index_ret);
4693
4694         btrfs_init_path(&path);
4695         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4696         if (ret) {
4697                 ret = INODE_REF_MISSING;
4698                 goto extref;
4699         }
4700
4701         node = path.nodes[0];
4702         slot = path.slots[0];
4703
4704         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4705         total = btrfs_item_size_nr(node, slot);
4706
4707         /* Iterate all entry of INODE_REF */
4708         while (cur < total) {
4709                 ret = INODE_REF_MISSING;
4710
4711                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4712                 ref_index = btrfs_inode_ref_index(node, ref);
4713                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4714                         goto next_ref;
4715
4716                 if (cur + sizeof(*ref) + ref_namelen > total ||
4717                     ref_namelen > BTRFS_NAME_LEN) {
4718                         warning("root %llu INODE %s[%llu %llu] name too long",
4719                                 root->objectid,
4720                                 key->type == BTRFS_INODE_REF_KEY ?
4721                                         "REF" : "EXTREF",
4722                                 key->objectid, key->offset);
4723
4724                         if (cur + sizeof(*ref) > total)
4725                                 break;
4726                         len = min_t(u32, total - cur - sizeof(*ref),
4727                                     BTRFS_NAME_LEN);
4728                 } else {
4729                         len = ref_namelen;
4730                 }
4731
4732                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4733                                    len);
4734
4735                 if (len != namelen || strncmp(ref_namebuf, name, len))
4736                         goto next_ref;
4737
4738                 *index_ret = ref_index;
4739                 ret = 0;
4740                 goto out;
4741 next_ref:
4742                 len = sizeof(*ref) + ref_namelen;
4743                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4744                 cur += len;
4745         }
4746
4747 extref:
4748         /* Skip if not support EXTENDED_IREF feature */
4749         if (!ext_ref)
4750                 goto out;
4751
4752         btrfs_release_path(&path);
4753         btrfs_init_path(&path);
4754
4755         dir_id = key->offset;
4756         key->type = BTRFS_INODE_EXTREF_KEY;
4757         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4758
4759         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4760         if (ret) {
4761                 ret = INODE_REF_MISSING;
4762                 goto out;
4763         }
4764
4765         node = path.nodes[0];
4766         slot = path.slots[0];
4767
4768         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4769         cur = 0;
4770         total = btrfs_item_size_nr(node, slot);
4771
4772         /* Iterate all entry of INODE_EXTREF */
4773         while (cur < total) {
4774                 ret = INODE_REF_MISSING;
4775
4776                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4777                 ref_index = btrfs_inode_extref_index(node, extref);
4778                 parent = btrfs_inode_extref_parent(node, extref);
4779                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4780                         goto next_extref;
4781
4782                 if (parent != dir_id)
4783                         goto next_extref;
4784
4785                 if (ref_namelen <= BTRFS_NAME_LEN) {
4786                         len = ref_namelen;
4787                 } else {
4788                         len = BTRFS_NAME_LEN;
4789                         warning("root %llu INODE %s[%llu %llu] name too long",
4790                                 root->objectid,
4791                                 key->type == BTRFS_INODE_REF_KEY ?
4792                                         "REF" : "EXTREF",
4793                                 key->objectid, key->offset);
4794                 }
4795                 read_extent_buffer(node, ref_namebuf,
4796                                    (unsigned long)(extref + 1), len);
4797
4798                 if (len != namelen || strncmp(ref_namebuf, name, len))
4799                         goto next_extref;
4800
4801                 *index_ret = ref_index;
4802                 ret = 0;
4803                 goto out;
4804
4805 next_extref:
4806                 len = sizeof(*extref) + ref_namelen;
4807                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4808                 cur += len;
4809
4810         }
4811 out:
4812         btrfs_release_path(&path);
4813         return ret;
4814 }
4815
4816 /*
4817  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4818  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4819  *
4820  * @root:       the root of the fs/file tree
4821  * @key:        the key of the INODE_REF/INODE_EXTREF
4822  * @size:       the st_size of the INODE_ITEM
4823  * @ext_ref:    the EXTENDED_IREF feature
4824  *
4825  * Return 0 if no error occurred.
4826  */
4827 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4828                           struct extent_buffer *node, int slot, u64 *size,
4829                           unsigned int ext_ref)
4830 {
4831         struct btrfs_dir_item *di;
4832         struct btrfs_inode_item *ii;
4833         struct btrfs_path path;
4834         struct btrfs_key location;
4835         char namebuf[BTRFS_NAME_LEN] = {0};
4836         u32 total;
4837         u32 cur = 0;
4838         u32 len;
4839         u32 name_len;
4840         u32 data_len;
4841         u8 filetype;
4842         u32 mode;
4843         u64 index;
4844         int ret;
4845         int err = 0;
4846
4847         /*
4848          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4849          * ignore index check.
4850          */
4851         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4852
4853         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4854         total = btrfs_item_size_nr(node, slot);
4855
4856         while (cur < total) {
4857                 data_len = btrfs_dir_data_len(node, di);
4858                 if (data_len)
4859                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4860                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4861                               "DIR_ITEM" : "DIR_INDEX",
4862                               key->objectid, key->offset, data_len);
4863
4864                 name_len = btrfs_dir_name_len(node, di);
4865                 if (cur + sizeof(*di) + name_len > total ||
4866                     name_len > BTRFS_NAME_LEN) {
4867                         warning("root %llu %s[%llu %llu] name too long",
4868                                 root->objectid,
4869                                 key->type == BTRFS_DIR_ITEM_KEY ?
4870                                 "DIR_ITEM" : "DIR_INDEX",
4871                                 key->objectid, key->offset);
4872
4873                         if (cur + sizeof(*di) > total)
4874                                 break;
4875                         len = min_t(u32, total - cur - sizeof(*di),
4876                                     BTRFS_NAME_LEN);
4877                 } else {
4878                         len = name_len;
4879                 }
4880                 (*size) += name_len;
4881
4882                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4883                 filetype = btrfs_dir_type(node, di);
4884
4885                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4886                     key->offset != btrfs_name_hash(namebuf, len)) {
4887                         err |= -EIO;
4888                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4889                                 root->objectid, key->objectid, key->offset,
4890                                 namebuf, len, filetype, key->offset,
4891                                 btrfs_name_hash(namebuf, len));
4892                 }
4893
4894                 btrfs_init_path(&path);
4895                 btrfs_dir_item_key_to_cpu(node, di, &location);
4896
4897                 /* Ignore related ROOT_ITEM check */
4898                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4899                         goto next;
4900
4901                 /* Check relative INODE_ITEM(existence/filetype) */
4902                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4903                 if (ret) {
4904                         err |= INODE_ITEM_MISSING;
4905                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4906                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4907                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4908                               key->offset, location.objectid, name_len,
4909                               namebuf, filetype);
4910                         goto next;
4911                 }
4912
4913                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4914                                     struct btrfs_inode_item);
4915                 mode = btrfs_inode_mode(path.nodes[0], ii);
4916
4917                 if (imode_to_type(mode) != filetype) {
4918                         err |= INODE_ITEM_MISMATCH;
4919                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4920                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4921                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4922                               key->offset, name_len, namebuf, filetype);
4923                 }
4924
4925                 /* Check relative INODE_REF/INODE_EXTREF */
4926                 location.type = BTRFS_INODE_REF_KEY;
4927                 location.offset = key->objectid;
4928                 ret = find_inode_ref(root, &location, namebuf, len,
4929                                      &index, ext_ref);
4930                 err |= ret;
4931                 if (ret & INODE_REF_MISSING)
4932                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4933                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4934                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4935                               key->offset, name_len, namebuf, filetype);
4936
4937 next:
4938                 btrfs_release_path(&path);
4939                 len = sizeof(*di) + name_len + data_len;
4940                 di = (struct btrfs_dir_item *)((char *)di + len);
4941                 cur += len;
4942
4943                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4944                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4945                               root->objectid, key->objectid, key->offset);
4946                         break;
4947                 }
4948         }
4949
4950         return err;
4951 }
4952
4953 /*
4954  * Check file extent datasum/hole, update the size of the file extents,
4955  * check and update the last offset of the file extent.
4956  *
4957  * @root:       the root of fs/file tree.
4958  * @fkey:       the key of the file extent.
4959  * @nodatasum:  INODE_NODATASUM feature.
4960  * @size:       the sum of all EXTENT_DATA items size for this inode.
4961  * @end:        the offset of the last extent.
4962  *
4963  * Return 0 if no error occurred.
4964  */
4965 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4966                              struct extent_buffer *node, int slot,
4967                              unsigned int nodatasum, u64 *size, u64 *end)
4968 {
4969         struct btrfs_file_extent_item *fi;
4970         u64 disk_bytenr;
4971         u64 disk_num_bytes;
4972         u64 extent_num_bytes;
4973         u64 extent_offset;
4974         u64 csum_found;         /* In byte size, sectorsize aligned */
4975         u64 search_start;       /* Logical range start we search for csum */
4976         u64 search_len;         /* Logical range len we search for csum */
4977         unsigned int extent_type;
4978         unsigned int is_hole;
4979         int compressed = 0;
4980         int ret;
4981         int err = 0;
4982
4983         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4984
4985         /* Check inline extent */
4986         extent_type = btrfs_file_extent_type(node, fi);
4987         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4988                 struct btrfs_item *e = btrfs_item_nr(slot);
4989                 u32 item_inline_len;
4990
4991                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4992                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4993                 compressed = btrfs_file_extent_compression(node, fi);
4994                 if (extent_num_bytes == 0) {
4995                         error(
4996                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4997                                 root->objectid, fkey->objectid, fkey->offset);
4998                         err |= FILE_EXTENT_ERROR;
4999                 }
5000                 if (!compressed && extent_num_bytes != item_inline_len) {
5001                         error(
5002                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5003                                 root->objectid, fkey->objectid, fkey->offset,
5004                                 extent_num_bytes, item_inline_len);
5005                         err |= FILE_EXTENT_ERROR;
5006                 }
5007                 *end += extent_num_bytes;
5008                 *size += extent_num_bytes;
5009                 return err;
5010         }
5011
5012         /* Check extent type */
5013         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5014                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5015                 err |= FILE_EXTENT_ERROR;
5016                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5017                       root->objectid, fkey->objectid, fkey->offset);
5018                 return err;
5019         }
5020
5021         /* Check REG_EXTENT/PREALLOC_EXTENT */
5022         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5023         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5024         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5025         extent_offset = btrfs_file_extent_offset(node, fi);
5026         compressed = btrfs_file_extent_compression(node, fi);
5027         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5028
5029         /*
5030          * Check EXTENT_DATA csum
5031          *
5032          * For plain (uncompressed) extent, we should only check the range
5033          * we're referring to, as it's possible that part of prealloc extent
5034          * has been written, and has csum:
5035          *
5036          * |<--- Original large preallocated extent A ---->|
5037          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5038          *      No csum                         Has csum
5039          *
5040          * For compressed extent, we should check the whole range.
5041          */
5042         if (!compressed) {
5043                 search_start = disk_bytenr + extent_offset;
5044                 search_len = extent_num_bytes;
5045         } else {
5046                 search_start = disk_bytenr;
5047                 search_len = disk_num_bytes;
5048         }
5049         ret = count_csum_range(root, search_start, search_len, &csum_found);
5050         if (csum_found > 0 && nodatasum) {
5051                 err |= ODD_CSUM_ITEM;
5052                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5053                       root->objectid, fkey->objectid, fkey->offset);
5054         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5055                    !is_hole && (ret < 0 || csum_found < search_len)) {
5056                 err |= CSUM_ITEM_MISSING;
5057                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5058                       root->objectid, fkey->objectid, fkey->offset,
5059                       csum_found, search_len);
5060         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5061                 err |= ODD_CSUM_ITEM;
5062                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5063                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5064         }
5065
5066         /* Check EXTENT_DATA hole */
5067         if (!no_holes && *end != fkey->offset) {
5068                 err |= FILE_EXTENT_ERROR;
5069                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5070                       root->objectid, fkey->objectid, fkey->offset);
5071         }
5072
5073         *end += extent_num_bytes;
5074         if (!is_hole)
5075                 *size += extent_num_bytes;
5076
5077         return err;
5078 }
5079
5080 /*
5081  * Set inode item nbytes to @nbytes
5082  *
5083  * Returns  0     on success
5084  * Returns  != 0  on error
5085  */
5086 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5087                                       struct btrfs_path *path,
5088                                       u64 ino, u64 nbytes)
5089 {
5090         struct btrfs_trans_handle *trans;
5091         struct btrfs_inode_item *ii;
5092         struct btrfs_key key;
5093         struct btrfs_key research_key;
5094         int err = 0;
5095         int ret;
5096
5097         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5098
5099         key.objectid = ino;
5100         key.type = BTRFS_INODE_ITEM_KEY;
5101         key.offset = 0;
5102
5103         trans = btrfs_start_transaction(root, 1);
5104         if (IS_ERR(trans)) {
5105                 ret = PTR_ERR(trans);
5106                 err |= ret;
5107                 goto out;
5108         }
5109
5110         btrfs_release_path(path);
5111         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5112         if (ret > 0)
5113                 ret = -ENOENT;
5114         if (ret) {
5115                 err |= ret;
5116                 goto fail;
5117         }
5118
5119         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5120                             struct btrfs_inode_item);
5121         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5122         btrfs_mark_buffer_dirty(path->nodes[0]);
5123 fail:
5124         btrfs_commit_transaction(trans, root);
5125 out:
5126         if (ret)
5127                 error("failed to set nbytes in inode %llu root %llu",
5128                       ino, root->root_key.objectid);
5129         else
5130                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5131                        root->root_key.objectid, nbytes);
5132
5133         /* research path */
5134         btrfs_release_path(path);
5135         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5136         err |= ret;
5137
5138         return err;
5139 }
5140
5141 /*
5142  * Set directory inode isize to @isize.
5143  *
5144  * Returns 0     on success.
5145  * Returns != 0  on error.
5146  */
5147 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5148                                    struct btrfs_path *path,
5149                                    u64 ino, u64 isize)
5150 {
5151         struct btrfs_trans_handle *trans;
5152         struct btrfs_inode_item *ii;
5153         struct btrfs_key key;
5154         struct btrfs_key research_key;
5155         int ret;
5156         int err = 0;
5157
5158         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5159
5160         key.objectid = ino;
5161         key.type = BTRFS_INODE_ITEM_KEY;
5162         key.offset = 0;
5163
5164         trans = btrfs_start_transaction(root, 1);
5165         if (IS_ERR(trans)) {
5166                 ret = PTR_ERR(trans);
5167                 err |= ret;
5168                 goto out;
5169         }
5170
5171         btrfs_release_path(path);
5172         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5173         if (ret > 0)
5174                 ret = -ENOENT;
5175         if (ret) {
5176                 err |= ret;
5177                 goto fail;
5178         }
5179
5180         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5181                             struct btrfs_inode_item);
5182         btrfs_set_inode_size(path->nodes[0], ii, isize);
5183         btrfs_mark_buffer_dirty(path->nodes[0]);
5184 fail:
5185         btrfs_commit_transaction(trans, root);
5186 out:
5187         if (ret)
5188                 error("failed to set isize in inode %llu root %llu",
5189                       ino, root->root_key.objectid);
5190         else
5191                 printf("Set isize in inode %llu root %llu to %llu\n",
5192                        ino, root->root_key.objectid, isize);
5193
5194         btrfs_release_path(path);
5195         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5196         err |= ret;
5197
5198         return err;
5199 }
5200
5201 /*
5202  * Wrapper function for btrfs_add_orphan_item().
5203  *
5204  * Returns 0     on success.
5205  * Returns != 0  on error.
5206  */
5207 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5208                                            struct btrfs_path *path, u64 ino)
5209 {
5210         struct btrfs_trans_handle *trans;
5211         struct btrfs_key research_key;
5212         int ret;
5213         int err = 0;
5214
5215         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5216
5217         trans = btrfs_start_transaction(root, 1);
5218         if (IS_ERR(trans)) {
5219                 ret = PTR_ERR(trans);
5220                 err |= ret;
5221                 goto out;
5222         }
5223
5224         btrfs_release_path(path);
5225         ret = btrfs_add_orphan_item(trans, root, path, ino);
5226         err |= ret;
5227         btrfs_commit_transaction(trans, root);
5228 out:
5229         if (ret)
5230                 error("failed to add inode %llu as orphan item root %llu",
5231                       ino, root->root_key.objectid);
5232         else
5233                 printf("Added inode %llu as orphan item root %llu\n",
5234                        ino, root->root_key.objectid);
5235
5236         btrfs_release_path(path);
5237         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5238         err |= ret;
5239
5240         return err;
5241 }
5242
5243 /*
5244  * Check INODE_ITEM and related ITEMs (the same inode number)
5245  * 1. check link count
5246  * 2. check inode ref/extref
5247  * 3. check dir item/index
5248  *
5249  * @ext_ref:    the EXTENDED_IREF feature
5250  *
5251  * Return 0 if no error occurred.
5252  * Return >0 for error or hit the traversal is done(by error bitmap)
5253  */
5254 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5255                             unsigned int ext_ref)
5256 {
5257         struct extent_buffer *node;
5258         struct btrfs_inode_item *ii;
5259         struct btrfs_key key;
5260         u64 inode_id;
5261         u32 mode;
5262         u64 nlink;
5263         u64 nbytes;
5264         u64 isize;
5265         u64 size = 0;
5266         u64 refs = 0;
5267         u64 extent_end = 0;
5268         u64 extent_size = 0;
5269         unsigned int dir;
5270         unsigned int nodatasum;
5271         int slot;
5272         int ret;
5273         int err = 0;
5274         char namebuf[BTRFS_NAME_LEN] = {0};
5275         u32 name_len = 0;
5276
5277         node = path->nodes[0];
5278         slot = path->slots[0];
5279
5280         btrfs_item_key_to_cpu(node, &key, slot);
5281         inode_id = key.objectid;
5282
5283         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5284                 ret = btrfs_next_item(root, path);
5285                 if (ret > 0)
5286                         err |= LAST_ITEM;
5287                 return err;
5288         }
5289
5290         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5291         isize = btrfs_inode_size(node, ii);
5292         nbytes = btrfs_inode_nbytes(node, ii);
5293         mode = btrfs_inode_mode(node, ii);
5294         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5295         nlink = btrfs_inode_nlink(node, ii);
5296         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5297
5298         while (1) {
5299                 ret = btrfs_next_item(root, path);
5300                 if (ret < 0) {
5301                         /* out will fill 'err' rusing current statistics */
5302                         goto out;
5303                 } else if (ret > 0) {
5304                         err |= LAST_ITEM;
5305                         goto out;
5306                 }
5307
5308                 node = path->nodes[0];
5309                 slot = path->slots[0];
5310                 btrfs_item_key_to_cpu(node, &key, slot);
5311                 if (key.objectid != inode_id)
5312                         goto out;
5313
5314                 switch (key.type) {
5315                 case BTRFS_INODE_REF_KEY:
5316                         ret = check_inode_ref(root, &key, path, namebuf,
5317                                               &name_len, &refs, mode);
5318                         err |= ret;
5319                         break;
5320                 case BTRFS_INODE_EXTREF_KEY:
5321                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5322                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5323                                         root->objectid, key.objectid,
5324                                         key.offset);
5325                         ret = check_inode_extref(root, &key, node, slot, &refs,
5326                                                  mode);
5327                         err |= ret;
5328                         break;
5329                 case BTRFS_DIR_ITEM_KEY:
5330                 case BTRFS_DIR_INDEX_KEY:
5331                         if (!dir) {
5332                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5333                                         root->objectid, inode_id,
5334                                         imode_to_type(mode), key.objectid,
5335                                         key.offset);
5336                         }
5337                         ret = check_dir_item(root, &key, node, slot, &size,
5338                                              ext_ref);
5339                         err |= ret;
5340                         break;
5341                 case BTRFS_EXTENT_DATA_KEY:
5342                         if (dir) {
5343                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5344                                         root->objectid, inode_id, key.objectid,
5345                                         key.offset);
5346                         }
5347                         ret = check_file_extent(root, &key, node, slot,
5348                                                 nodatasum, &extent_size,
5349                                                 &extent_end);
5350                         err |= ret;
5351                         break;
5352                 case BTRFS_XATTR_ITEM_KEY:
5353                         break;
5354                 default:
5355                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5356                               key.objectid, key.type, key.offset);
5357                 }
5358         }
5359
5360 out:
5361         /* verify INODE_ITEM nlink/isize/nbytes */
5362         if (dir) {
5363                 if (nlink != 1) {
5364                         err |= LINK_COUNT_ERROR;
5365                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5366                               root->objectid, inode_id, nlink);
5367                 }
5368
5369                 /*
5370                  * Just a warning, as dir inode nbytes is just an
5371                  * instructive value.
5372                  */
5373                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5374                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5375                                 root->objectid, inode_id,
5376                                 root->fs_info->nodesize);
5377                 }
5378
5379                 if (isize != size) {
5380                         if (repair)
5381                                 ret = repair_dir_isize_lowmem(root, path,
5382                                                               inode_id, size);
5383                         if (!repair || ret) {
5384                                 err |= ISIZE_ERROR;
5385                                 error(
5386                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5387                                       root->objectid, inode_id, isize, size);
5388                         }
5389                 }
5390         } else {
5391                 if (nlink != refs) {
5392                         err |= LINK_COUNT_ERROR;
5393                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5394                               root->objectid, inode_id, nlink, refs);
5395                 } else if (!nlink) {
5396                         if (repair)
5397                                 ret = repair_inode_orphan_item_lowmem(root,
5398                                                               path, inode_id);
5399                         if (!repair || ret) {
5400                                 err |= ORPHAN_ITEM;
5401                                 error("root %llu INODE[%llu] is orphan item",
5402                                       root->objectid, inode_id);
5403                         }
5404                 }
5405
5406                 if (!nbytes && !no_holes && extent_end < isize) {
5407                         err |= NBYTES_ERROR;
5408                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5409                               root->objectid, inode_id, isize);
5410                 }
5411
5412                 if (nbytes != extent_size) {
5413                         if (repair)
5414                                 ret = repair_inode_nbytes_lowmem(root, path,
5415                                                          inode_id, extent_size);
5416                         if (!repair || ret) {
5417                                 err |= NBYTES_ERROR;
5418                                 error(
5419         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5420                                       root->objectid, inode_id, nbytes,
5421                                       extent_size);
5422                         }
5423                 }
5424         }
5425
5426         return err;
5427 }
5428
5429 /*
5430  * check first root dir's inode_item and inode_ref
5431  *
5432  * returns 0 means no error
5433  * returns >0 means error
5434  * returns <0 means fatal error
5435  */
5436 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5437 {
5438         struct btrfs_path path;
5439         struct btrfs_key key;
5440         struct btrfs_inode_item *ii;
5441         u64 index;
5442         u32 mode;
5443         int err = 0;
5444         int ret;
5445
5446         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5447         key.type = BTRFS_INODE_ITEM_KEY;
5448         key.offset = 0;
5449
5450         /* For root being dropped, we don't need to check first inode */
5451         if (btrfs_root_refs(&root->root_item) == 0 &&
5452             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5453             BTRFS_FIRST_FREE_OBJECTID)
5454                 return 0;
5455
5456         btrfs_init_path(&path);
5457         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5458         if (ret < 0)
5459                 goto out;
5460         if (ret > 0) {
5461                 ret = 0;
5462                 err |= INODE_ITEM_MISSING;
5463         } else {
5464                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5465                                     struct btrfs_inode_item);
5466                 mode = btrfs_inode_mode(path.nodes[0], ii);
5467                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5468                         err |= INODE_ITEM_MISMATCH;
5469         }
5470
5471         /* lookup first inode ref */
5472         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5473         key.type = BTRFS_INODE_REF_KEY;
5474         /* special index value */
5475         index = 0;
5476
5477         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5478         if (ret < 0)
5479                 goto out;
5480         err |= ret;
5481
5482 out:
5483         btrfs_release_path(&path);
5484         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5485                 error("root dir INODE_ITEM is %s",
5486                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5487         if (err & INODE_REF_MISSING)
5488                 error("root dir INODE_REF is missing");
5489
5490         return ret < 0 ? ret : err;
5491 }
5492
5493 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5494                                                 u64 parent, u64 root)
5495 {
5496         struct rb_node *node;
5497         struct tree_backref *back = NULL;
5498         struct tree_backref match = {
5499                 .node = {
5500                         .is_data = 0,
5501                 },
5502         };
5503
5504         if (parent) {
5505                 match.parent = parent;
5506                 match.node.full_backref = 1;
5507         } else {
5508                 match.root = root;
5509         }
5510
5511         node = rb_search(&rec->backref_tree, &match.node.node,
5512                          (rb_compare_keys)compare_extent_backref, NULL);
5513         if (node)
5514                 back = to_tree_backref(rb_node_to_extent_backref(node));
5515
5516         return back;
5517 }
5518
5519 static struct data_backref *find_data_backref(struct extent_record *rec,
5520                                                 u64 parent, u64 root,
5521                                                 u64 owner, u64 offset,
5522                                                 int found_ref,
5523                                                 u64 disk_bytenr, u64 bytes)
5524 {
5525         struct rb_node *node;
5526         struct data_backref *back = NULL;
5527         struct data_backref match = {
5528                 .node = {
5529                         .is_data = 1,
5530                 },
5531                 .owner = owner,
5532                 .offset = offset,
5533                 .bytes = bytes,
5534                 .found_ref = found_ref,
5535                 .disk_bytenr = disk_bytenr,
5536         };
5537
5538         if (parent) {
5539                 match.parent = parent;
5540                 match.node.full_backref = 1;
5541         } else {
5542                 match.root = root;
5543         }
5544
5545         node = rb_search(&rec->backref_tree, &match.node.node,
5546                          (rb_compare_keys)compare_extent_backref, NULL);
5547         if (node)
5548                 back = to_data_backref(rb_node_to_extent_backref(node));
5549
5550         return back;
5551 }
5552 /*
5553  * Iterate all item on the tree and call check_inode_item() to check.
5554  *
5555  * @root:       the root of the tree to be checked.
5556  * @ext_ref:    the EXTENDED_IREF feature
5557  *
5558  * Return 0 if no error found.
5559  * Return <0 for error.
5560  */
5561 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5562 {
5563         struct btrfs_path path;
5564         struct node_refs nrefs;
5565         struct btrfs_root_item *root_item = &root->root_item;
5566         int ret;
5567         int level;
5568         int err = 0;
5569
5570         /*
5571          * We need to manually check the first inode item(256)
5572          * As the following traversal function will only start from
5573          * the first inode item in the leaf, if inode item(256) is missing
5574          * we will just skip it forever.
5575          */
5576         ret = check_fs_first_inode(root, ext_ref);
5577         if (ret < 0)
5578                 return ret;
5579         err |= !!ret;
5580
5581         memset(&nrefs, 0, sizeof(nrefs));
5582         level = btrfs_header_level(root->node);
5583         btrfs_init_path(&path);
5584
5585         if (btrfs_root_refs(root_item) > 0 ||
5586             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5587                 path.nodes[level] = root->node;
5588                 path.slots[level] = 0;
5589                 extent_buffer_get(root->node);
5590         } else {
5591                 struct btrfs_key key;
5592
5593                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5594                 level = root_item->drop_level;
5595                 path.lowest_level = level;
5596                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5597                 if (ret < 0)
5598                         goto out;
5599                 ret = 0;
5600         }
5601
5602         while (1) {
5603                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5604                 err |= !!ret;
5605
5606                 /* if ret is negative, walk shall stop */
5607                 if (ret < 0) {
5608                         ret = err;
5609                         break;
5610                 }
5611
5612                 ret = walk_up_tree_v2(root, &path, &level);
5613                 if (ret != 0) {
5614                         /* Normal exit, reset ret to err */
5615                         ret = err;
5616                         break;
5617                 }
5618         }
5619
5620 out:
5621         btrfs_release_path(&path);
5622         return ret;
5623 }
5624
5625 /*
5626  * Find the relative ref for root_ref and root_backref.
5627  *
5628  * @root:       the root of the root tree.
5629  * @ref_key:    the key of the root ref.
5630  *
5631  * Return 0 if no error occurred.
5632  */
5633 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5634                           struct extent_buffer *node, int slot)
5635 {
5636         struct btrfs_path path;
5637         struct btrfs_key key;
5638         struct btrfs_root_ref *ref;
5639         struct btrfs_root_ref *backref;
5640         char ref_name[BTRFS_NAME_LEN] = {0};
5641         char backref_name[BTRFS_NAME_LEN] = {0};
5642         u64 ref_dirid;
5643         u64 ref_seq;
5644         u32 ref_namelen;
5645         u64 backref_dirid;
5646         u64 backref_seq;
5647         u32 backref_namelen;
5648         u32 len;
5649         int ret;
5650         int err = 0;
5651
5652         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5653         ref_dirid = btrfs_root_ref_dirid(node, ref);
5654         ref_seq = btrfs_root_ref_sequence(node, ref);
5655         ref_namelen = btrfs_root_ref_name_len(node, ref);
5656
5657         if (ref_namelen <= BTRFS_NAME_LEN) {
5658                 len = ref_namelen;
5659         } else {
5660                 len = BTRFS_NAME_LEN;
5661                 warning("%s[%llu %llu] ref_name too long",
5662                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5663                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5664                         ref_key->offset);
5665         }
5666         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5667
5668         /* Find relative root_ref */
5669         key.objectid = ref_key->offset;
5670         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5671         key.offset = ref_key->objectid;
5672
5673         btrfs_init_path(&path);
5674         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5675         if (ret) {
5676                 err |= ROOT_REF_MISSING;
5677                 error("%s[%llu %llu] couldn't find relative ref",
5678                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5679                       "ROOT_REF" : "ROOT_BACKREF",
5680                       ref_key->objectid, ref_key->offset);
5681                 goto out;
5682         }
5683
5684         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5685                                  struct btrfs_root_ref);
5686         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5687         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5688         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5689
5690         if (backref_namelen <= BTRFS_NAME_LEN) {
5691                 len = backref_namelen;
5692         } else {
5693                 len = BTRFS_NAME_LEN;
5694                 warning("%s[%llu %llu] ref_name too long",
5695                         key.type == BTRFS_ROOT_REF_KEY ?
5696                         "ROOT_REF" : "ROOT_BACKREF",
5697                         key.objectid, key.offset);
5698         }
5699         read_extent_buffer(path.nodes[0], backref_name,
5700                            (unsigned long)(backref + 1), len);
5701
5702         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5703             ref_namelen != backref_namelen ||
5704             strncmp(ref_name, backref_name, len)) {
5705                 err |= ROOT_REF_MISMATCH;
5706                 error("%s[%llu %llu] mismatch relative ref",
5707                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5708                       "ROOT_REF" : "ROOT_BACKREF",
5709                       ref_key->objectid, ref_key->offset);
5710         }
5711 out:
5712         btrfs_release_path(&path);
5713         return err;
5714 }
5715
5716 /*
5717  * Check all fs/file tree in low_memory mode.
5718  *
5719  * 1. for fs tree root item, call check_fs_root_v2()
5720  * 2. for fs tree root ref/backref, call check_root_ref()
5721  *
5722  * Return 0 if no error occurred.
5723  */
5724 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5725 {
5726         struct btrfs_root *tree_root = fs_info->tree_root;
5727         struct btrfs_root *cur_root = NULL;
5728         struct btrfs_path path;
5729         struct btrfs_key key;
5730         struct extent_buffer *node;
5731         unsigned int ext_ref;
5732         int slot;
5733         int ret;
5734         int err = 0;
5735
5736         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5737
5738         btrfs_init_path(&path);
5739         key.objectid = BTRFS_FS_TREE_OBJECTID;
5740         key.offset = 0;
5741         key.type = BTRFS_ROOT_ITEM_KEY;
5742
5743         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5744         if (ret < 0) {
5745                 err = ret;
5746                 goto out;
5747         } else if (ret > 0) {
5748                 err = -ENOENT;
5749                 goto out;
5750         }
5751
5752         while (1) {
5753                 node = path.nodes[0];
5754                 slot = path.slots[0];
5755                 btrfs_item_key_to_cpu(node, &key, slot);
5756                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5757                         goto out;
5758                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5759                     fs_root_objectid(key.objectid)) {
5760                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5761                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5762                                                                        &key);
5763                         } else {
5764                                 key.offset = (u64)-1;
5765                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5766                         }
5767
5768                         if (IS_ERR(cur_root)) {
5769                                 error("Fail to read fs/subvol tree: %lld",
5770                                       key.objectid);
5771                                 err = -EIO;
5772                                 goto next;
5773                         }
5774
5775                         ret = check_fs_root_v2(cur_root, ext_ref);
5776                         err |= ret;
5777
5778                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5779                                 btrfs_free_fs_root(cur_root);
5780                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5781                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5782                         ret = check_root_ref(tree_root, &key, node, slot);
5783                         err |= ret;
5784                 }
5785 next:
5786                 ret = btrfs_next_item(tree_root, &path);
5787                 if (ret > 0)
5788                         goto out;
5789                 if (ret < 0) {
5790                         err = ret;
5791                         goto out;
5792                 }
5793         }
5794
5795 out:
5796         btrfs_release_path(&path);
5797         return err;
5798 }
5799
5800 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5801                           struct cache_tree *root_cache)
5802 {
5803         int ret;
5804
5805         if (!ctx.progress_enabled)
5806                 fprintf(stderr, "checking fs roots\n");
5807         if (check_mode == CHECK_MODE_LOWMEM)
5808                 ret = check_fs_roots_v2(fs_info);
5809         else
5810                 ret = check_fs_roots(fs_info, root_cache);
5811
5812         return ret;
5813 }
5814
5815 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5816 {
5817         struct extent_backref *back, *tmp;
5818         struct tree_backref *tback;
5819         struct data_backref *dback;
5820         u64 found = 0;
5821         int err = 0;
5822
5823         rbtree_postorder_for_each_entry_safe(back, tmp,
5824                                              &rec->backref_tree, node) {
5825                 if (!back->found_extent_tree) {
5826                         err = 1;
5827                         if (!print_errs)
5828                                 goto out;
5829                         if (back->is_data) {
5830                                 dback = to_data_backref(back);
5831                                 fprintf(stderr, "Data backref %llu %s %llu"
5832                                         " owner %llu offset %llu num_refs %lu"
5833                                         " not found in extent tree\n",
5834                                         (unsigned long long)rec->start,
5835                                         back->full_backref ?
5836                                         "parent" : "root",
5837                                         back->full_backref ?
5838                                         (unsigned long long)dback->parent:
5839                                         (unsigned long long)dback->root,
5840                                         (unsigned long long)dback->owner,
5841                                         (unsigned long long)dback->offset,
5842                                         (unsigned long)dback->num_refs);
5843                         } else {
5844                                 tback = to_tree_backref(back);
5845                                 fprintf(stderr, "Tree backref %llu parent %llu"
5846                                         " root %llu not found in extent tree\n",
5847                                         (unsigned long long)rec->start,
5848                                         (unsigned long long)tback->parent,
5849                                         (unsigned long long)tback->root);
5850                         }
5851                 }
5852                 if (!back->is_data && !back->found_ref) {
5853                         err = 1;
5854                         if (!print_errs)
5855                                 goto out;
5856                         tback = to_tree_backref(back);
5857                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5858                                 (unsigned long long)rec->start,
5859                                 back->full_backref ? "parent" : "root",
5860                                 back->full_backref ?
5861                                 (unsigned long long)tback->parent :
5862                                 (unsigned long long)tback->root, back);
5863                 }
5864                 if (back->is_data) {
5865                         dback = to_data_backref(back);
5866                         if (dback->found_ref != dback->num_refs) {
5867                                 err = 1;
5868                                 if (!print_errs)
5869                                         goto out;
5870                                 fprintf(stderr, "Incorrect local backref count"
5871                                         " on %llu %s %llu owner %llu"
5872                                         " offset %llu found %u wanted %u back %p\n",
5873                                         (unsigned long long)rec->start,
5874                                         back->full_backref ?
5875                                         "parent" : "root",
5876                                         back->full_backref ?
5877                                         (unsigned long long)dback->parent:
5878                                         (unsigned long long)dback->root,
5879                                         (unsigned long long)dback->owner,
5880                                         (unsigned long long)dback->offset,
5881                                         dback->found_ref, dback->num_refs, back);
5882                         }
5883                         if (dback->disk_bytenr != rec->start) {
5884                                 err = 1;
5885                                 if (!print_errs)
5886                                         goto out;
5887                                 fprintf(stderr, "Backref disk bytenr does not"
5888                                         " match extent record, bytenr=%llu, "
5889                                         "ref bytenr=%llu\n",
5890                                         (unsigned long long)rec->start,
5891                                         (unsigned long long)dback->disk_bytenr);
5892                         }
5893
5894                         if (dback->bytes != rec->nr) {
5895                                 err = 1;
5896                                 if (!print_errs)
5897                                         goto out;
5898                                 fprintf(stderr, "Backref bytes do not match "
5899                                         "extent backref, bytenr=%llu, ref "
5900                                         "bytes=%llu, backref bytes=%llu\n",
5901                                         (unsigned long long)rec->start,
5902                                         (unsigned long long)rec->nr,
5903                                         (unsigned long long)dback->bytes);
5904                         }
5905                 }
5906                 if (!back->is_data) {
5907                         found += 1;
5908                 } else {
5909                         dback = to_data_backref(back);
5910                         found += dback->found_ref;
5911                 }
5912         }
5913         if (found != rec->refs) {
5914                 err = 1;
5915                 if (!print_errs)
5916                         goto out;
5917                 fprintf(stderr, "Incorrect global backref count "
5918                         "on %llu found %llu wanted %llu\n",
5919                         (unsigned long long)rec->start,
5920                         (unsigned long long)found,
5921                         (unsigned long long)rec->refs);
5922         }
5923 out:
5924         return err;
5925 }
5926
5927 static void __free_one_backref(struct rb_node *node)
5928 {
5929         struct extent_backref *back = rb_node_to_extent_backref(node);
5930
5931         free(back);
5932 }
5933
5934 static void free_all_extent_backrefs(struct extent_record *rec)
5935 {
5936         rb_free_nodes(&rec->backref_tree, __free_one_backref);
5937 }
5938
5939 static void free_extent_record_cache(struct cache_tree *extent_cache)
5940 {
5941         struct cache_extent *cache;
5942         struct extent_record *rec;
5943
5944         while (1) {
5945                 cache = first_cache_extent(extent_cache);
5946                 if (!cache)
5947                         break;
5948                 rec = container_of(cache, struct extent_record, cache);
5949                 remove_cache_extent(extent_cache, cache);
5950                 free_all_extent_backrefs(rec);
5951                 free(rec);
5952         }
5953 }
5954
5955 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5956                                  struct extent_record *rec)
5957 {
5958         if (rec->content_checked && rec->owner_ref_checked &&
5959             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5960             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5961             !rec->bad_full_backref && !rec->crossing_stripes &&
5962             !rec->wrong_chunk_type) {
5963                 remove_cache_extent(extent_cache, &rec->cache);
5964                 free_all_extent_backrefs(rec);
5965                 list_del_init(&rec->list);
5966                 free(rec);
5967         }
5968         return 0;
5969 }
5970
5971 static int check_owner_ref(struct btrfs_root *root,
5972                             struct extent_record *rec,
5973                             struct extent_buffer *buf)
5974 {
5975         struct extent_backref *node, *tmp;
5976         struct tree_backref *back;
5977         struct btrfs_root *ref_root;
5978         struct btrfs_key key;
5979         struct btrfs_path path;
5980         struct extent_buffer *parent;
5981         int level;
5982         int found = 0;
5983         int ret;
5984
5985         rbtree_postorder_for_each_entry_safe(node, tmp,
5986                                              &rec->backref_tree, node) {
5987                 if (node->is_data)
5988                         continue;
5989                 if (!node->found_ref)
5990                         continue;
5991                 if (node->full_backref)
5992                         continue;
5993                 back = to_tree_backref(node);
5994                 if (btrfs_header_owner(buf) == back->root)
5995                         return 0;
5996         }
5997         BUG_ON(rec->is_root);
5998
5999         /* try to find the block by search corresponding fs tree */
6000         key.objectid = btrfs_header_owner(buf);
6001         key.type = BTRFS_ROOT_ITEM_KEY;
6002         key.offset = (u64)-1;
6003
6004         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6005         if (IS_ERR(ref_root))
6006                 return 1;
6007
6008         level = btrfs_header_level(buf);
6009         if (level == 0)
6010                 btrfs_item_key_to_cpu(buf, &key, 0);
6011         else
6012                 btrfs_node_key_to_cpu(buf, &key, 0);
6013
6014         btrfs_init_path(&path);
6015         path.lowest_level = level + 1;
6016         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6017         if (ret < 0)
6018                 return 0;
6019
6020         parent = path.nodes[level + 1];
6021         if (parent && buf->start == btrfs_node_blockptr(parent,
6022                                                         path.slots[level + 1]))
6023                 found = 1;
6024
6025         btrfs_release_path(&path);
6026         return found ? 0 : 1;
6027 }
6028
6029 static int is_extent_tree_record(struct extent_record *rec)
6030 {
6031         struct extent_backref *node, *tmp;
6032         struct tree_backref *back;
6033         int is_extent = 0;
6034
6035         rbtree_postorder_for_each_entry_safe(node, tmp,
6036                                              &rec->backref_tree, node) {
6037                 if (node->is_data)
6038                         return 0;
6039                 back = to_tree_backref(node);
6040                 if (node->full_backref)
6041                         return 0;
6042                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6043                         is_extent = 1;
6044         }
6045         return is_extent;
6046 }
6047
6048
6049 static int record_bad_block_io(struct btrfs_fs_info *info,
6050                                struct cache_tree *extent_cache,
6051                                u64 start, u64 len)
6052 {
6053         struct extent_record *rec;
6054         struct cache_extent *cache;
6055         struct btrfs_key key;
6056
6057         cache = lookup_cache_extent(extent_cache, start, len);
6058         if (!cache)
6059                 return 0;
6060
6061         rec = container_of(cache, struct extent_record, cache);
6062         if (!is_extent_tree_record(rec))
6063                 return 0;
6064
6065         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6066         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6067 }
6068
6069 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6070                        struct extent_buffer *buf, int slot)
6071 {
6072         if (btrfs_header_level(buf)) {
6073                 struct btrfs_key_ptr ptr1, ptr2;
6074
6075                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6076                                    sizeof(struct btrfs_key_ptr));
6077                 read_extent_buffer(buf, &ptr2,
6078                                    btrfs_node_key_ptr_offset(slot + 1),
6079                                    sizeof(struct btrfs_key_ptr));
6080                 write_extent_buffer(buf, &ptr1,
6081                                     btrfs_node_key_ptr_offset(slot + 1),
6082                                     sizeof(struct btrfs_key_ptr));
6083                 write_extent_buffer(buf, &ptr2,
6084                                     btrfs_node_key_ptr_offset(slot),
6085                                     sizeof(struct btrfs_key_ptr));
6086                 if (slot == 0) {
6087                         struct btrfs_disk_key key;
6088                         btrfs_node_key(buf, &key, 0);
6089                         btrfs_fixup_low_keys(root, path, &key,
6090                                              btrfs_header_level(buf) + 1);
6091                 }
6092         } else {
6093                 struct btrfs_item *item1, *item2;
6094                 struct btrfs_key k1, k2;
6095                 char *item1_data, *item2_data;
6096                 u32 item1_offset, item2_offset, item1_size, item2_size;
6097
6098                 item1 = btrfs_item_nr(slot);
6099                 item2 = btrfs_item_nr(slot + 1);
6100                 btrfs_item_key_to_cpu(buf, &k1, slot);
6101                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6102                 item1_offset = btrfs_item_offset(buf, item1);
6103                 item2_offset = btrfs_item_offset(buf, item2);
6104                 item1_size = btrfs_item_size(buf, item1);
6105                 item2_size = btrfs_item_size(buf, item2);
6106
6107                 item1_data = malloc(item1_size);
6108                 if (!item1_data)
6109                         return -ENOMEM;
6110                 item2_data = malloc(item2_size);
6111                 if (!item2_data) {
6112                         free(item1_data);
6113                         return -ENOMEM;
6114                 }
6115
6116                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6117                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6118
6119                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6120                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6121                 free(item1_data);
6122                 free(item2_data);
6123
6124                 btrfs_set_item_offset(buf, item1, item2_offset);
6125                 btrfs_set_item_offset(buf, item2, item1_offset);
6126                 btrfs_set_item_size(buf, item1, item2_size);
6127                 btrfs_set_item_size(buf, item2, item1_size);
6128
6129                 path->slots[0] = slot;
6130                 btrfs_set_item_key_unsafe(root, path, &k2);
6131                 path->slots[0] = slot + 1;
6132                 btrfs_set_item_key_unsafe(root, path, &k1);
6133         }
6134         return 0;
6135 }
6136
6137 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6138 {
6139         struct extent_buffer *buf;
6140         struct btrfs_key k1, k2;
6141         int i;
6142         int level = path->lowest_level;
6143         int ret = -EIO;
6144
6145         buf = path->nodes[level];
6146         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6147                 if (level) {
6148                         btrfs_node_key_to_cpu(buf, &k1, i);
6149                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6150                 } else {
6151                         btrfs_item_key_to_cpu(buf, &k1, i);
6152                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6153                 }
6154                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6155                         continue;
6156                 ret = swap_values(root, path, buf, i);
6157                 if (ret)
6158                         break;
6159                 btrfs_mark_buffer_dirty(buf);
6160                 i = 0;
6161         }
6162         return ret;
6163 }
6164
6165 static int delete_bogus_item(struct btrfs_root *root,
6166                              struct btrfs_path *path,
6167                              struct extent_buffer *buf, int slot)
6168 {
6169         struct btrfs_key key;
6170         int nritems = btrfs_header_nritems(buf);
6171
6172         btrfs_item_key_to_cpu(buf, &key, slot);
6173
6174         /* These are all the keys we can deal with missing. */
6175         if (key.type != BTRFS_DIR_INDEX_KEY &&
6176             key.type != BTRFS_EXTENT_ITEM_KEY &&
6177             key.type != BTRFS_METADATA_ITEM_KEY &&
6178             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6179             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6180                 return -1;
6181
6182         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6183                (unsigned long long)key.objectid, key.type,
6184                (unsigned long long)key.offset, slot, buf->start);
6185         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6186                               btrfs_item_nr_offset(slot + 1),
6187                               sizeof(struct btrfs_item) *
6188                               (nritems - slot - 1));
6189         btrfs_set_header_nritems(buf, nritems - 1);
6190         if (slot == 0) {
6191                 struct btrfs_disk_key disk_key;
6192
6193                 btrfs_item_key(buf, &disk_key, 0);
6194                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6195         }
6196         btrfs_mark_buffer_dirty(buf);
6197         return 0;
6198 }
6199
6200 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6201 {
6202         struct extent_buffer *buf;
6203         int i;
6204         int ret = 0;
6205
6206         /* We should only get this for leaves */
6207         BUG_ON(path->lowest_level);
6208         buf = path->nodes[0];
6209 again:
6210         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6211                 unsigned int shift = 0, offset;
6212
6213                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6214                     BTRFS_LEAF_DATA_SIZE(root)) {
6215                         if (btrfs_item_end_nr(buf, i) >
6216                             BTRFS_LEAF_DATA_SIZE(root)) {
6217                                 ret = delete_bogus_item(root, path, buf, i);
6218                                 if (!ret)
6219                                         goto again;
6220                                 fprintf(stderr, "item is off the end of the "
6221                                         "leaf, can't fix\n");
6222                                 ret = -EIO;
6223                                 break;
6224                         }
6225                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6226                                 btrfs_item_end_nr(buf, i);
6227                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6228                            btrfs_item_offset_nr(buf, i - 1)) {
6229                         if (btrfs_item_end_nr(buf, i) >
6230                             btrfs_item_offset_nr(buf, i - 1)) {
6231                                 ret = delete_bogus_item(root, path, buf, i);
6232                                 if (!ret)
6233                                         goto again;
6234                                 fprintf(stderr, "items overlap, can't fix\n");
6235                                 ret = -EIO;
6236                                 break;
6237                         }
6238                         shift = btrfs_item_offset_nr(buf, i - 1) -
6239                                 btrfs_item_end_nr(buf, i);
6240                 }
6241                 if (!shift)
6242                         continue;
6243
6244                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6245                        i, shift, (unsigned long long)buf->start);
6246                 offset = btrfs_item_offset_nr(buf, i);
6247                 memmove_extent_buffer(buf,
6248                                       btrfs_leaf_data(buf) + offset + shift,
6249                                       btrfs_leaf_data(buf) + offset,
6250                                       btrfs_item_size_nr(buf, i));
6251                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6252                                       offset + shift);
6253                 btrfs_mark_buffer_dirty(buf);
6254         }
6255
6256         /*
6257          * We may have moved things, in which case we want to exit so we don't
6258          * write those changes out.  Once we have proper abort functionality in
6259          * progs this can be changed to something nicer.
6260          */
6261         BUG_ON(ret);
6262         return ret;
6263 }
6264
6265 /*
6266  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6267  * then just return -EIO.
6268  */
6269 static int try_to_fix_bad_block(struct btrfs_root *root,
6270                                 struct extent_buffer *buf,
6271                                 enum btrfs_tree_block_status status)
6272 {
6273         struct btrfs_trans_handle *trans;
6274         struct ulist *roots;
6275         struct ulist_node *node;
6276         struct btrfs_root *search_root;
6277         struct btrfs_path path;
6278         struct ulist_iterator iter;
6279         struct btrfs_key root_key, key;
6280         int ret;
6281
6282         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6283             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6284                 return -EIO;
6285
6286         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6287         if (ret)
6288                 return -EIO;
6289
6290         btrfs_init_path(&path);
6291         ULIST_ITER_INIT(&iter);
6292         while ((node = ulist_next(roots, &iter))) {
6293                 root_key.objectid = node->val;
6294                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6295                 root_key.offset = (u64)-1;
6296
6297                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6298                 if (IS_ERR(root)) {
6299                         ret = -EIO;
6300                         break;
6301                 }
6302
6303
6304                 trans = btrfs_start_transaction(search_root, 0);
6305                 if (IS_ERR(trans)) {
6306                         ret = PTR_ERR(trans);
6307                         break;
6308                 }
6309
6310                 path.lowest_level = btrfs_header_level(buf);
6311                 path.skip_check_block = 1;
6312                 if (path.lowest_level)
6313                         btrfs_node_key_to_cpu(buf, &key, 0);
6314                 else
6315                         btrfs_item_key_to_cpu(buf, &key, 0);
6316                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6317                 if (ret) {
6318                         ret = -EIO;
6319                         btrfs_commit_transaction(trans, search_root);
6320                         break;
6321                 }
6322                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6323                         ret = fix_key_order(search_root, &path);
6324                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6325                         ret = fix_item_offset(search_root, &path);
6326                 if (ret) {
6327                         btrfs_commit_transaction(trans, search_root);
6328                         break;
6329                 }
6330                 btrfs_release_path(&path);
6331                 btrfs_commit_transaction(trans, search_root);
6332         }
6333         ulist_free(roots);
6334         btrfs_release_path(&path);
6335         return ret;
6336 }
6337
6338 static int check_block(struct btrfs_root *root,
6339                        struct cache_tree *extent_cache,
6340                        struct extent_buffer *buf, u64 flags)
6341 {
6342         struct extent_record *rec;
6343         struct cache_extent *cache;
6344         struct btrfs_key key;
6345         enum btrfs_tree_block_status status;
6346         int ret = 0;
6347         int level;
6348
6349         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6350         if (!cache)
6351                 return 1;
6352         rec = container_of(cache, struct extent_record, cache);
6353         rec->generation = btrfs_header_generation(buf);
6354
6355         level = btrfs_header_level(buf);
6356         if (btrfs_header_nritems(buf) > 0) {
6357
6358                 if (level == 0)
6359                         btrfs_item_key_to_cpu(buf, &key, 0);
6360                 else
6361                         btrfs_node_key_to_cpu(buf, &key, 0);
6362
6363                 rec->info_objectid = key.objectid;
6364         }
6365         rec->info_level = level;
6366
6367         if (btrfs_is_leaf(buf))
6368                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6369         else
6370                 status = btrfs_check_node(root, &rec->parent_key, buf);
6371
6372         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6373                 if (repair)
6374                         status = try_to_fix_bad_block(root, buf, status);
6375                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6376                         ret = -EIO;
6377                         fprintf(stderr, "bad block %llu\n",
6378                                 (unsigned long long)buf->start);
6379                 } else {
6380                         /*
6381                          * Signal to callers we need to start the scan over
6382                          * again since we'll have cowed blocks.
6383                          */
6384                         ret = -EAGAIN;
6385                 }
6386         } else {
6387                 rec->content_checked = 1;
6388                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6389                         rec->owner_ref_checked = 1;
6390                 else {
6391                         ret = check_owner_ref(root, rec, buf);
6392                         if (!ret)
6393                                 rec->owner_ref_checked = 1;
6394                 }
6395         }
6396         if (!ret)
6397                 maybe_free_extent_rec(extent_cache, rec);
6398         return ret;
6399 }
6400
6401 #if 0
6402 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6403                                                 u64 parent, u64 root)
6404 {
6405         struct list_head *cur = rec->backrefs.next;
6406         struct extent_backref *node;
6407         struct tree_backref *back;
6408
6409         while(cur != &rec->backrefs) {
6410                 node = to_extent_backref(cur);
6411                 cur = cur->next;
6412                 if (node->is_data)
6413                         continue;
6414                 back = to_tree_backref(node);
6415                 if (parent > 0) {
6416                         if (!node->full_backref)
6417                                 continue;
6418                         if (parent == back->parent)
6419                                 return back;
6420                 } else {
6421                         if (node->full_backref)
6422                                 continue;
6423                         if (back->root == root)
6424                                 return back;
6425                 }
6426         }
6427         return NULL;
6428 }
6429 #endif
6430
6431 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6432                                                 u64 parent, u64 root)
6433 {
6434         struct tree_backref *ref = malloc(sizeof(*ref));
6435
6436         if (!ref)
6437                 return NULL;
6438         memset(&ref->node, 0, sizeof(ref->node));
6439         if (parent > 0) {
6440                 ref->parent = parent;
6441                 ref->node.full_backref = 1;
6442         } else {
6443                 ref->root = root;
6444                 ref->node.full_backref = 0;
6445         }
6446
6447         return ref;
6448 }
6449
6450 #if 0
6451 static struct data_backref *find_data_backref(struct extent_record *rec,
6452                                                 u64 parent, u64 root,
6453                                                 u64 owner, u64 offset,
6454                                                 int found_ref,
6455                                                 u64 disk_bytenr, u64 bytes)
6456 {
6457         struct list_head *cur = rec->backrefs.next;
6458         struct extent_backref *node;
6459         struct data_backref *back;
6460
6461         while(cur != &rec->backrefs) {
6462                 node = to_extent_backref(cur);
6463                 cur = cur->next;
6464                 if (!node->is_data)
6465                         continue;
6466                 back = to_data_backref(node);
6467                 if (parent > 0) {
6468                         if (!node->full_backref)
6469                                 continue;
6470                         if (parent == back->parent)
6471                                 return back;
6472                 } else {
6473                         if (node->full_backref)
6474                                 continue;
6475                         if (back->root == root && back->owner == owner &&
6476                             back->offset == offset) {
6477                                 if (found_ref && node->found_ref &&
6478                                     (back->bytes != bytes ||
6479                                     back->disk_bytenr != disk_bytenr))
6480                                         continue;
6481                                 return back;
6482                         }
6483                 }
6484         }
6485         return NULL;
6486 }
6487 #endif
6488
6489 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6490                                                 u64 parent, u64 root,
6491                                                 u64 owner, u64 offset,
6492                                                 u64 max_size)
6493 {
6494         struct data_backref *ref = malloc(sizeof(*ref));
6495
6496         if (!ref)
6497                 return NULL;
6498         memset(&ref->node, 0, sizeof(ref->node));
6499         ref->node.is_data = 1;
6500
6501         if (parent > 0) {
6502                 ref->parent = parent;
6503                 ref->owner = 0;
6504                 ref->offset = 0;
6505                 ref->node.full_backref = 1;
6506         } else {
6507                 ref->root = root;
6508                 ref->owner = owner;
6509                 ref->offset = offset;
6510                 ref->node.full_backref = 0;
6511         }
6512         ref->bytes = max_size;
6513         ref->found_ref = 0;
6514         ref->num_refs = 0;
6515         if (max_size > rec->max_size)
6516                 rec->max_size = max_size;
6517         return ref;
6518 }
6519
6520 /* Check if the type of extent matches with its chunk */
6521 static void check_extent_type(struct extent_record *rec)
6522 {
6523         struct btrfs_block_group_cache *bg_cache;
6524
6525         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6526         if (!bg_cache)
6527                 return;
6528
6529         /* data extent, check chunk directly*/
6530         if (!rec->metadata) {
6531                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6532                         rec->wrong_chunk_type = 1;
6533                 return;
6534         }
6535
6536         /* metadata extent, check the obvious case first */
6537         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6538                                  BTRFS_BLOCK_GROUP_METADATA))) {
6539                 rec->wrong_chunk_type = 1;
6540                 return;
6541         }
6542
6543         /*
6544          * Check SYSTEM extent, as it's also marked as metadata, we can only
6545          * make sure it's a SYSTEM extent by its backref
6546          */
6547         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6548                 struct extent_backref *node;
6549                 struct tree_backref *tback;
6550                 u64 bg_type;
6551
6552                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6553                 if (node->is_data) {
6554                         /* tree block shouldn't have data backref */
6555                         rec->wrong_chunk_type = 1;
6556                         return;
6557                 }
6558                 tback = container_of(node, struct tree_backref, node);
6559
6560                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6561                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6562                 else
6563                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6564                 if (!(bg_cache->flags & bg_type))
6565                         rec->wrong_chunk_type = 1;
6566         }
6567 }
6568
6569 /*
6570  * Allocate a new extent record, fill default values from @tmpl and insert int
6571  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6572  * the cache, otherwise it fails.
6573  */
6574 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6575                 struct extent_record *tmpl)
6576 {
6577         struct extent_record *rec;
6578         int ret = 0;
6579
6580         BUG_ON(tmpl->max_size == 0);
6581         rec = malloc(sizeof(*rec));
6582         if (!rec)
6583                 return -ENOMEM;
6584         rec->start = tmpl->start;
6585         rec->max_size = tmpl->max_size;
6586         rec->nr = max(tmpl->nr, tmpl->max_size);
6587         rec->found_rec = tmpl->found_rec;
6588         rec->content_checked = tmpl->content_checked;
6589         rec->owner_ref_checked = tmpl->owner_ref_checked;
6590         rec->num_duplicates = 0;
6591         rec->metadata = tmpl->metadata;
6592         rec->flag_block_full_backref = FLAG_UNSET;
6593         rec->bad_full_backref = 0;
6594         rec->crossing_stripes = 0;
6595         rec->wrong_chunk_type = 0;
6596         rec->is_root = tmpl->is_root;
6597         rec->refs = tmpl->refs;
6598         rec->extent_item_refs = tmpl->extent_item_refs;
6599         rec->parent_generation = tmpl->parent_generation;
6600         INIT_LIST_HEAD(&rec->backrefs);
6601         INIT_LIST_HEAD(&rec->dups);
6602         INIT_LIST_HEAD(&rec->list);
6603         rec->backref_tree = RB_ROOT;
6604         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6605         rec->cache.start = tmpl->start;
6606         rec->cache.size = tmpl->nr;
6607         ret = insert_cache_extent(extent_cache, &rec->cache);
6608         if (ret) {
6609                 free(rec);
6610                 return ret;
6611         }
6612         bytes_used += rec->nr;
6613
6614         if (tmpl->metadata)
6615                 rec->crossing_stripes = check_crossing_stripes(global_info,
6616                                 rec->start, global_info->nodesize);
6617         check_extent_type(rec);
6618         return ret;
6619 }
6620
6621 /*
6622  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6623  * some are hints:
6624  * - refs              - if found, increase refs
6625  * - is_root           - if found, set
6626  * - content_checked   - if found, set
6627  * - owner_ref_checked - if found, set
6628  *
6629  * If not found, create a new one, initialize and insert.
6630  */
6631 static int add_extent_rec(struct cache_tree *extent_cache,
6632                 struct extent_record *tmpl)
6633 {
6634         struct extent_record *rec;
6635         struct cache_extent *cache;
6636         int ret = 0;
6637         int dup = 0;
6638
6639         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6640         if (cache) {
6641                 rec = container_of(cache, struct extent_record, cache);
6642                 if (tmpl->refs)
6643                         rec->refs++;
6644                 if (rec->nr == 1)
6645                         rec->nr = max(tmpl->nr, tmpl->max_size);
6646
6647                 /*
6648                  * We need to make sure to reset nr to whatever the extent
6649                  * record says was the real size, this way we can compare it to
6650                  * the backrefs.
6651                  */
6652                 if (tmpl->found_rec) {
6653                         if (tmpl->start != rec->start || rec->found_rec) {
6654                                 struct extent_record *tmp;
6655
6656                                 dup = 1;
6657                                 if (list_empty(&rec->list))
6658                                         list_add_tail(&rec->list,
6659                                                       &duplicate_extents);
6660
6661                                 /*
6662                                  * We have to do this song and dance in case we
6663                                  * find an extent record that falls inside of
6664                                  * our current extent record but does not have
6665                                  * the same objectid.
6666                                  */
6667                                 tmp = malloc(sizeof(*tmp));
6668                                 if (!tmp)
6669                                         return -ENOMEM;
6670                                 tmp->start = tmpl->start;
6671                                 tmp->max_size = tmpl->max_size;
6672                                 tmp->nr = tmpl->nr;
6673                                 tmp->found_rec = 1;
6674                                 tmp->metadata = tmpl->metadata;
6675                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6676                                 INIT_LIST_HEAD(&tmp->list);
6677                                 list_add_tail(&tmp->list, &rec->dups);
6678                                 rec->num_duplicates++;
6679                         } else {
6680                                 rec->nr = tmpl->nr;
6681                                 rec->found_rec = 1;
6682                         }
6683                 }
6684
6685                 if (tmpl->extent_item_refs && !dup) {
6686                         if (rec->extent_item_refs) {
6687                                 fprintf(stderr, "block %llu rec "
6688                                         "extent_item_refs %llu, passed %llu\n",
6689                                         (unsigned long long)tmpl->start,
6690                                         (unsigned long long)
6691                                                         rec->extent_item_refs,
6692                                         (unsigned long long)tmpl->extent_item_refs);
6693                         }
6694                         rec->extent_item_refs = tmpl->extent_item_refs;
6695                 }
6696                 if (tmpl->is_root)
6697                         rec->is_root = 1;
6698                 if (tmpl->content_checked)
6699                         rec->content_checked = 1;
6700                 if (tmpl->owner_ref_checked)
6701                         rec->owner_ref_checked = 1;
6702                 memcpy(&rec->parent_key, &tmpl->parent_key,
6703                                 sizeof(tmpl->parent_key));
6704                 if (tmpl->parent_generation)
6705                         rec->parent_generation = tmpl->parent_generation;
6706                 if (rec->max_size < tmpl->max_size)
6707                         rec->max_size = tmpl->max_size;
6708
6709                 /*
6710                  * A metadata extent can't cross stripe_len boundary, otherwise
6711                  * kernel scrub won't be able to handle it.
6712                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6713                  * it.
6714                  */
6715                 if (tmpl->metadata)
6716                         rec->crossing_stripes = check_crossing_stripes(
6717                                         global_info, rec->start,
6718                                         global_info->nodesize);
6719                 check_extent_type(rec);
6720                 maybe_free_extent_rec(extent_cache, rec);
6721                 return ret;
6722         }
6723
6724         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6725
6726         return ret;
6727 }
6728
6729 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6730                             u64 parent, u64 root, int found_ref)
6731 {
6732         struct extent_record *rec;
6733         struct tree_backref *back;
6734         struct cache_extent *cache;
6735         int ret;
6736         bool insert = false;
6737
6738         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6739         if (!cache) {
6740                 struct extent_record tmpl;
6741
6742                 memset(&tmpl, 0, sizeof(tmpl));
6743                 tmpl.start = bytenr;
6744                 tmpl.nr = 1;
6745                 tmpl.metadata = 1;
6746                 tmpl.max_size = 1;
6747
6748                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6749                 if (ret)
6750                         return ret;
6751
6752                 /* really a bug in cache_extent implement now */
6753                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6754                 if (!cache)
6755                         return -ENOENT;
6756         }
6757
6758         rec = container_of(cache, struct extent_record, cache);
6759         if (rec->start != bytenr) {
6760                 /*
6761                  * Several cause, from unaligned bytenr to over lapping extents
6762                  */
6763                 return -EEXIST;
6764         }
6765
6766         back = find_tree_backref(rec, parent, root);
6767         if (!back) {
6768                 back = alloc_tree_backref(rec, parent, root);
6769                 if (!back)
6770                         return -ENOMEM;
6771                 insert = true;
6772         }
6773
6774         if (found_ref) {
6775                 if (back->node.found_ref) {
6776                         fprintf(stderr, "Extent back ref already exists "
6777                                 "for %llu parent %llu root %llu \n",
6778                                 (unsigned long long)bytenr,
6779                                 (unsigned long long)parent,
6780                                 (unsigned long long)root);
6781                 }
6782                 back->node.found_ref = 1;
6783         } else {
6784                 if (back->node.found_extent_tree) {
6785                         fprintf(stderr, "Extent back ref already exists "
6786                                 "for %llu parent %llu root %llu \n",
6787                                 (unsigned long long)bytenr,
6788                                 (unsigned long long)parent,
6789                                 (unsigned long long)root);
6790                 }
6791                 back->node.found_extent_tree = 1;
6792         }
6793         if (insert)
6794                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6795                         compare_extent_backref));
6796         check_extent_type(rec);
6797         maybe_free_extent_rec(extent_cache, rec);
6798         return 0;
6799 }
6800
6801 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6802                             u64 parent, u64 root, u64 owner, u64 offset,
6803                             u32 num_refs, int found_ref, u64 max_size)
6804 {
6805         struct extent_record *rec;
6806         struct data_backref *back;
6807         struct cache_extent *cache;
6808         int ret;
6809         bool insert = false;
6810
6811         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6812         if (!cache) {
6813                 struct extent_record tmpl;
6814
6815                 memset(&tmpl, 0, sizeof(tmpl));
6816                 tmpl.start = bytenr;
6817                 tmpl.nr = 1;
6818                 tmpl.max_size = max_size;
6819
6820                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6821                 if (ret)
6822                         return ret;
6823
6824                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6825                 if (!cache)
6826                         abort();
6827         }
6828
6829         rec = container_of(cache, struct extent_record, cache);
6830         if (rec->max_size < max_size)
6831                 rec->max_size = max_size;
6832
6833         /*
6834          * If found_ref is set then max_size is the real size and must match the
6835          * existing refs.  So if we have already found a ref then we need to
6836          * make sure that this ref matches the existing one, otherwise we need
6837          * to add a new backref so we can notice that the backrefs don't match
6838          * and we need to figure out who is telling the truth.  This is to
6839          * account for that awful fsync bug I introduced where we'd end up with
6840          * a btrfs_file_extent_item that would have its length include multiple
6841          * prealloc extents or point inside of a prealloc extent.
6842          */
6843         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6844                                  bytenr, max_size);
6845         if (!back) {
6846                 back = alloc_data_backref(rec, parent, root, owner, offset,
6847                                           max_size);
6848                 BUG_ON(!back);
6849                 insert = true;
6850         }
6851
6852         if (found_ref) {
6853                 BUG_ON(num_refs != 1);
6854                 if (back->node.found_ref)
6855                         BUG_ON(back->bytes != max_size);
6856                 back->node.found_ref = 1;
6857                 back->found_ref += 1;
6858                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6859                         back->bytes = max_size;
6860                         back->disk_bytenr = bytenr;
6861
6862                         /* Need to reinsert if not already in the tree */
6863                         if (!insert) {
6864                                 rb_erase(&back->node.node, &rec->backref_tree);
6865                                 insert = true;
6866                         }
6867                 }
6868                 rec->refs += 1;
6869                 rec->content_checked = 1;
6870                 rec->owner_ref_checked = 1;
6871         } else {
6872                 if (back->node.found_extent_tree) {
6873                         fprintf(stderr, "Extent back ref already exists "
6874                                 "for %llu parent %llu root %llu "
6875                                 "owner %llu offset %llu num_refs %lu\n",
6876                                 (unsigned long long)bytenr,
6877                                 (unsigned long long)parent,
6878                                 (unsigned long long)root,
6879                                 (unsigned long long)owner,
6880                                 (unsigned long long)offset,
6881                                 (unsigned long)num_refs);
6882                 }
6883                 back->num_refs = num_refs;
6884                 back->node.found_extent_tree = 1;
6885         }
6886         if (insert)
6887                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6888                         compare_extent_backref));
6889
6890         maybe_free_extent_rec(extent_cache, rec);
6891         return 0;
6892 }
6893
6894 static int add_pending(struct cache_tree *pending,
6895                        struct cache_tree *seen, u64 bytenr, u32 size)
6896 {
6897         int ret;
6898         ret = add_cache_extent(seen, bytenr, size);
6899         if (ret)
6900                 return ret;
6901         add_cache_extent(pending, bytenr, size);
6902         return 0;
6903 }
6904
6905 static int pick_next_pending(struct cache_tree *pending,
6906                         struct cache_tree *reada,
6907                         struct cache_tree *nodes,
6908                         u64 last, struct block_info *bits, int bits_nr,
6909                         int *reada_bits)
6910 {
6911         unsigned long node_start = last;
6912         struct cache_extent *cache;
6913         int ret;
6914
6915         cache = search_cache_extent(reada, 0);
6916         if (cache) {
6917                 bits[0].start = cache->start;
6918                 bits[0].size = cache->size;
6919                 *reada_bits = 1;
6920                 return 1;
6921         }
6922         *reada_bits = 0;
6923         if (node_start > 32768)
6924                 node_start -= 32768;
6925
6926         cache = search_cache_extent(nodes, node_start);
6927         if (!cache)
6928                 cache = search_cache_extent(nodes, 0);
6929
6930         if (!cache) {
6931                  cache = search_cache_extent(pending, 0);
6932                  if (!cache)
6933                          return 0;
6934                  ret = 0;
6935                  do {
6936                          bits[ret].start = cache->start;
6937                          bits[ret].size = cache->size;
6938                          cache = next_cache_extent(cache);
6939                          ret++;
6940                  } while (cache && ret < bits_nr);
6941                  return ret;
6942         }
6943
6944         ret = 0;
6945         do {
6946                 bits[ret].start = cache->start;
6947                 bits[ret].size = cache->size;
6948                 cache = next_cache_extent(cache);
6949                 ret++;
6950         } while (cache && ret < bits_nr);
6951
6952         if (bits_nr - ret > 8) {
6953                 u64 lookup = bits[0].start + bits[0].size;
6954                 struct cache_extent *next;
6955                 next = search_cache_extent(pending, lookup);
6956                 while(next) {
6957                         if (next->start - lookup > 32768)
6958                                 break;
6959                         bits[ret].start = next->start;
6960                         bits[ret].size = next->size;
6961                         lookup = next->start + next->size;
6962                         ret++;
6963                         if (ret == bits_nr)
6964                                 break;
6965                         next = next_cache_extent(next);
6966                         if (!next)
6967                                 break;
6968                 }
6969         }
6970         return ret;
6971 }
6972
6973 static void free_chunk_record(struct cache_extent *cache)
6974 {
6975         struct chunk_record *rec;
6976
6977         rec = container_of(cache, struct chunk_record, cache);
6978         list_del_init(&rec->list);
6979         list_del_init(&rec->dextents);
6980         free(rec);
6981 }
6982
6983 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6984 {
6985         cache_tree_free_extents(chunk_cache, free_chunk_record);
6986 }
6987
6988 static void free_device_record(struct rb_node *node)
6989 {
6990         struct device_record *rec;
6991
6992         rec = container_of(node, struct device_record, node);
6993         free(rec);
6994 }
6995
6996 FREE_RB_BASED_TREE(device_cache, free_device_record);
6997
6998 int insert_block_group_record(struct block_group_tree *tree,
6999                               struct block_group_record *bg_rec)
7000 {
7001         int ret;
7002
7003         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7004         if (ret)
7005                 return ret;
7006
7007         list_add_tail(&bg_rec->list, &tree->block_groups);
7008         return 0;
7009 }
7010
7011 static void free_block_group_record(struct cache_extent *cache)
7012 {
7013         struct block_group_record *rec;
7014
7015         rec = container_of(cache, struct block_group_record, cache);
7016         list_del_init(&rec->list);
7017         free(rec);
7018 }
7019
7020 void free_block_group_tree(struct block_group_tree *tree)
7021 {
7022         cache_tree_free_extents(&tree->tree, free_block_group_record);
7023 }
7024
7025 int insert_device_extent_record(struct device_extent_tree *tree,
7026                                 struct device_extent_record *de_rec)
7027 {
7028         int ret;
7029
7030         /*
7031          * Device extent is a bit different from the other extents, because
7032          * the extents which belong to the different devices may have the
7033          * same start and size, so we need use the special extent cache
7034          * search/insert functions.
7035          */
7036         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7037         if (ret)
7038                 return ret;
7039
7040         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7041         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7042         return 0;
7043 }
7044
7045 static void free_device_extent_record(struct cache_extent *cache)
7046 {
7047         struct device_extent_record *rec;
7048
7049         rec = container_of(cache, struct device_extent_record, cache);
7050         if (!list_empty(&rec->chunk_list))
7051                 list_del_init(&rec->chunk_list);
7052         if (!list_empty(&rec->device_list))
7053                 list_del_init(&rec->device_list);
7054         free(rec);
7055 }
7056
7057 void free_device_extent_tree(struct device_extent_tree *tree)
7058 {
7059         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7060 }
7061
7062 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7063 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7064                                  struct extent_buffer *leaf, int slot)
7065 {
7066         struct btrfs_extent_ref_v0 *ref0;
7067         struct btrfs_key key;
7068         int ret;
7069
7070         btrfs_item_key_to_cpu(leaf, &key, slot);
7071         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7072         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7073                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7074                                 0, 0);
7075         } else {
7076                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7077                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7078         }
7079         return ret;
7080 }
7081 #endif
7082
7083 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7084                                             struct btrfs_key *key,
7085                                             int slot)
7086 {
7087         struct btrfs_chunk *ptr;
7088         struct chunk_record *rec;
7089         int num_stripes, i;
7090
7091         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7092         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7093
7094         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7095         if (!rec) {
7096                 fprintf(stderr, "memory allocation failed\n");
7097                 exit(-1);
7098         }
7099
7100         INIT_LIST_HEAD(&rec->list);
7101         INIT_LIST_HEAD(&rec->dextents);
7102         rec->bg_rec = NULL;
7103
7104         rec->cache.start = key->offset;
7105         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7106
7107         rec->generation = btrfs_header_generation(leaf);
7108
7109         rec->objectid = key->objectid;
7110         rec->type = key->type;
7111         rec->offset = key->offset;
7112
7113         rec->length = rec->cache.size;
7114         rec->owner = btrfs_chunk_owner(leaf, ptr);
7115         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7116         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7117         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7118         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7119         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7120         rec->num_stripes = num_stripes;
7121         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7122
7123         for (i = 0; i < rec->num_stripes; ++i) {
7124                 rec->stripes[i].devid =
7125                         btrfs_stripe_devid_nr(leaf, ptr, i);
7126                 rec->stripes[i].offset =
7127                         btrfs_stripe_offset_nr(leaf, ptr, i);
7128                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7129                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7130                                 BTRFS_UUID_SIZE);
7131         }
7132
7133         return rec;
7134 }
7135
7136 static int process_chunk_item(struct cache_tree *chunk_cache,
7137                               struct btrfs_key *key, struct extent_buffer *eb,
7138                               int slot)
7139 {
7140         struct chunk_record *rec;
7141         struct btrfs_chunk *chunk;
7142         int ret = 0;
7143
7144         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7145         /*
7146          * Do extra check for this chunk item,
7147          *
7148          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7149          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7150          * and owner<->key_type check.
7151          */
7152         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7153                                       key->offset);
7154         if (ret < 0) {
7155                 error("chunk(%llu, %llu) is not valid, ignore it",
7156                       key->offset, btrfs_chunk_length(eb, chunk));
7157                 return 0;
7158         }
7159         rec = btrfs_new_chunk_record(eb, key, slot);
7160         ret = insert_cache_extent(chunk_cache, &rec->cache);
7161         if (ret) {
7162                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7163                         rec->offset, rec->length);
7164                 free(rec);
7165         }
7166
7167         return ret;
7168 }
7169
7170 static int process_device_item(struct rb_root *dev_cache,
7171                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7172 {
7173         struct btrfs_dev_item *ptr;
7174         struct device_record *rec;
7175         int ret = 0;
7176
7177         ptr = btrfs_item_ptr(eb,
7178                 slot, struct btrfs_dev_item);
7179
7180         rec = malloc(sizeof(*rec));
7181         if (!rec) {
7182                 fprintf(stderr, "memory allocation failed\n");
7183                 return -ENOMEM;
7184         }
7185
7186         rec->devid = key->offset;
7187         rec->generation = btrfs_header_generation(eb);
7188
7189         rec->objectid = key->objectid;
7190         rec->type = key->type;
7191         rec->offset = key->offset;
7192
7193         rec->devid = btrfs_device_id(eb, ptr);
7194         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7195         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7196
7197         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7198         if (ret) {
7199                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7200                 free(rec);
7201         }
7202
7203         return ret;
7204 }
7205
7206 struct block_group_record *
7207 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7208                              int slot)
7209 {
7210         struct btrfs_block_group_item *ptr;
7211         struct block_group_record *rec;
7212
7213         rec = calloc(1, sizeof(*rec));
7214         if (!rec) {
7215                 fprintf(stderr, "memory allocation failed\n");
7216                 exit(-1);
7217         }
7218
7219         rec->cache.start = key->objectid;
7220         rec->cache.size = key->offset;
7221
7222         rec->generation = btrfs_header_generation(leaf);
7223
7224         rec->objectid = key->objectid;
7225         rec->type = key->type;
7226         rec->offset = key->offset;
7227
7228         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7229         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7230
7231         INIT_LIST_HEAD(&rec->list);
7232
7233         return rec;
7234 }
7235
7236 static int process_block_group_item(struct block_group_tree *block_group_cache,
7237                                     struct btrfs_key *key,
7238                                     struct extent_buffer *eb, int slot)
7239 {
7240         struct block_group_record *rec;
7241         int ret = 0;
7242
7243         rec = btrfs_new_block_group_record(eb, key, slot);
7244         ret = insert_block_group_record(block_group_cache, rec);
7245         if (ret) {
7246                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7247                         rec->objectid, rec->offset);
7248                 free(rec);
7249         }
7250
7251         return ret;
7252 }
7253
7254 struct device_extent_record *
7255 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7256                                struct btrfs_key *key, int slot)
7257 {
7258         struct device_extent_record *rec;
7259         struct btrfs_dev_extent *ptr;
7260
7261         rec = calloc(1, sizeof(*rec));
7262         if (!rec) {
7263                 fprintf(stderr, "memory allocation failed\n");
7264                 exit(-1);
7265         }
7266
7267         rec->cache.objectid = key->objectid;
7268         rec->cache.start = key->offset;
7269
7270         rec->generation = btrfs_header_generation(leaf);
7271
7272         rec->objectid = key->objectid;
7273         rec->type = key->type;
7274         rec->offset = key->offset;
7275
7276         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7277         rec->chunk_objecteid =
7278                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7279         rec->chunk_offset =
7280                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7281         rec->length = btrfs_dev_extent_length(leaf, ptr);
7282         rec->cache.size = rec->length;
7283
7284         INIT_LIST_HEAD(&rec->chunk_list);
7285         INIT_LIST_HEAD(&rec->device_list);
7286
7287         return rec;
7288 }
7289
7290 static int
7291 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7292                            struct btrfs_key *key, struct extent_buffer *eb,
7293                            int slot)
7294 {
7295         struct device_extent_record *rec;
7296         int ret;
7297
7298         rec = btrfs_new_device_extent_record(eb, key, slot);
7299         ret = insert_device_extent_record(dev_extent_cache, rec);
7300         if (ret) {
7301                 fprintf(stderr,
7302                         "Device extent[%llu, %llu, %llu] existed.\n",
7303                         rec->objectid, rec->offset, rec->length);
7304                 free(rec);
7305         }
7306
7307         return ret;
7308 }
7309
7310 static int process_extent_item(struct btrfs_root *root,
7311                                struct cache_tree *extent_cache,
7312                                struct extent_buffer *eb, int slot)
7313 {
7314         struct btrfs_extent_item *ei;
7315         struct btrfs_extent_inline_ref *iref;
7316         struct btrfs_extent_data_ref *dref;
7317         struct btrfs_shared_data_ref *sref;
7318         struct btrfs_key key;
7319         struct extent_record tmpl;
7320         unsigned long end;
7321         unsigned long ptr;
7322         int ret;
7323         int type;
7324         u32 item_size = btrfs_item_size_nr(eb, slot);
7325         u64 refs = 0;
7326         u64 offset;
7327         u64 num_bytes;
7328         int metadata = 0;
7329
7330         btrfs_item_key_to_cpu(eb, &key, slot);
7331
7332         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7333                 metadata = 1;
7334                 num_bytes = root->fs_info->nodesize;
7335         } else {
7336                 num_bytes = key.offset;
7337         }
7338
7339         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7340                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7341                       key.objectid, root->fs_info->sectorsize);
7342                 return -EIO;
7343         }
7344         if (item_size < sizeof(*ei)) {
7345 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7346                 struct btrfs_extent_item_v0 *ei0;
7347                 BUG_ON(item_size != sizeof(*ei0));
7348                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7349                 refs = btrfs_extent_refs_v0(eb, ei0);
7350 #else
7351                 BUG();
7352 #endif
7353                 memset(&tmpl, 0, sizeof(tmpl));
7354                 tmpl.start = key.objectid;
7355                 tmpl.nr = num_bytes;
7356                 tmpl.extent_item_refs = refs;
7357                 tmpl.metadata = metadata;
7358                 tmpl.found_rec = 1;
7359                 tmpl.max_size = num_bytes;
7360
7361                 return add_extent_rec(extent_cache, &tmpl);
7362         }
7363
7364         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7365         refs = btrfs_extent_refs(eb, ei);
7366         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7367                 metadata = 1;
7368         else
7369                 metadata = 0;
7370         if (metadata && num_bytes != root->fs_info->nodesize) {
7371                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7372                       num_bytes, root->fs_info->nodesize);
7373                 return -EIO;
7374         }
7375         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7376                 error("ignore invalid data extent, length %llu is not aligned to %u",
7377                       num_bytes, root->fs_info->sectorsize);
7378                 return -EIO;
7379         }
7380
7381         memset(&tmpl, 0, sizeof(tmpl));
7382         tmpl.start = key.objectid;
7383         tmpl.nr = num_bytes;
7384         tmpl.extent_item_refs = refs;
7385         tmpl.metadata = metadata;
7386         tmpl.found_rec = 1;
7387         tmpl.max_size = num_bytes;
7388         add_extent_rec(extent_cache, &tmpl);
7389
7390         ptr = (unsigned long)(ei + 1);
7391         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7392             key.type == BTRFS_EXTENT_ITEM_KEY)
7393                 ptr += sizeof(struct btrfs_tree_block_info);
7394
7395         end = (unsigned long)ei + item_size;
7396         while (ptr < end) {
7397                 iref = (struct btrfs_extent_inline_ref *)ptr;
7398                 type = btrfs_extent_inline_ref_type(eb, iref);
7399                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7400                 switch (type) {
7401                 case BTRFS_TREE_BLOCK_REF_KEY:
7402                         ret = add_tree_backref(extent_cache, key.objectid,
7403                                         0, offset, 0);
7404                         if (ret < 0)
7405                                 error(
7406                         "add_tree_backref failed (extent items tree block): %s",
7407                                       strerror(-ret));
7408                         break;
7409                 case BTRFS_SHARED_BLOCK_REF_KEY:
7410                         ret = add_tree_backref(extent_cache, key.objectid,
7411                                         offset, 0, 0);
7412                         if (ret < 0)
7413                                 error(
7414                         "add_tree_backref failed (extent items shared block): %s",
7415                                       strerror(-ret));
7416                         break;
7417                 case BTRFS_EXTENT_DATA_REF_KEY:
7418                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7419                         add_data_backref(extent_cache, key.objectid, 0,
7420                                         btrfs_extent_data_ref_root(eb, dref),
7421                                         btrfs_extent_data_ref_objectid(eb,
7422                                                                        dref),
7423                                         btrfs_extent_data_ref_offset(eb, dref),
7424                                         btrfs_extent_data_ref_count(eb, dref),
7425                                         0, num_bytes);
7426                         break;
7427                 case BTRFS_SHARED_DATA_REF_KEY:
7428                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7429                         add_data_backref(extent_cache, key.objectid, offset,
7430                                         0, 0, 0,
7431                                         btrfs_shared_data_ref_count(eb, sref),
7432                                         0, num_bytes);
7433                         break;
7434                 default:
7435                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7436                                 key.objectid, key.type, num_bytes);
7437                         goto out;
7438                 }
7439                 ptr += btrfs_extent_inline_ref_size(type);
7440         }
7441         WARN_ON(ptr > end);
7442 out:
7443         return 0;
7444 }
7445
7446 static int check_cache_range(struct btrfs_root *root,
7447                              struct btrfs_block_group_cache *cache,
7448                              u64 offset, u64 bytes)
7449 {
7450         struct btrfs_free_space *entry;
7451         u64 *logical;
7452         u64 bytenr;
7453         int stripe_len;
7454         int i, nr, ret;
7455
7456         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7457                 bytenr = btrfs_sb_offset(i);
7458                 ret = btrfs_rmap_block(root->fs_info,
7459                                        cache->key.objectid, bytenr, 0,
7460                                        &logical, &nr, &stripe_len);
7461                 if (ret)
7462                         return ret;
7463
7464                 while (nr--) {
7465                         if (logical[nr] + stripe_len <= offset)
7466                                 continue;
7467                         if (offset + bytes <= logical[nr])
7468                                 continue;
7469                         if (logical[nr] == offset) {
7470                                 if (stripe_len >= bytes) {
7471                                         free(logical);
7472                                         return 0;
7473                                 }
7474                                 bytes -= stripe_len;
7475                                 offset += stripe_len;
7476                         } else if (logical[nr] < offset) {
7477                                 if (logical[nr] + stripe_len >=
7478                                     offset + bytes) {
7479                                         free(logical);
7480                                         return 0;
7481                                 }
7482                                 bytes = (offset + bytes) -
7483                                         (logical[nr] + stripe_len);
7484                                 offset = logical[nr] + stripe_len;
7485                         } else {
7486                                 /*
7487                                  * Could be tricky, the super may land in the
7488                                  * middle of the area we're checking.  First
7489                                  * check the easiest case, it's at the end.
7490                                  */
7491                                 if (logical[nr] + stripe_len >=
7492                                     bytes + offset) {
7493                                         bytes = logical[nr] - offset;
7494                                         continue;
7495                                 }
7496
7497                                 /* Check the left side */
7498                                 ret = check_cache_range(root, cache,
7499                                                         offset,
7500                                                         logical[nr] - offset);
7501                                 if (ret) {
7502                                         free(logical);
7503                                         return ret;
7504                                 }
7505
7506                                 /* Now we continue with the right side */
7507                                 bytes = (offset + bytes) -
7508                                         (logical[nr] + stripe_len);
7509                                 offset = logical[nr] + stripe_len;
7510                         }
7511                 }
7512
7513                 free(logical);
7514         }
7515
7516         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7517         if (!entry) {
7518                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7519                         offset, offset+bytes);
7520                 return -EINVAL;
7521         }
7522
7523         if (entry->offset != offset) {
7524                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7525                         entry->offset);
7526                 return -EINVAL;
7527         }
7528
7529         if (entry->bytes != bytes) {
7530                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7531                         bytes, entry->bytes, offset);
7532                 return -EINVAL;
7533         }
7534
7535         unlink_free_space(cache->free_space_ctl, entry);
7536         free(entry);
7537         return 0;
7538 }
7539
7540 static int verify_space_cache(struct btrfs_root *root,
7541                               struct btrfs_block_group_cache *cache)
7542 {
7543         struct btrfs_path path;
7544         struct extent_buffer *leaf;
7545         struct btrfs_key key;
7546         u64 last;
7547         int ret = 0;
7548
7549         root = root->fs_info->extent_root;
7550
7551         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7552
7553         btrfs_init_path(&path);
7554         key.objectid = last;
7555         key.offset = 0;
7556         key.type = BTRFS_EXTENT_ITEM_KEY;
7557         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7558         if (ret < 0)
7559                 goto out;
7560         ret = 0;
7561         while (1) {
7562                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7563                         ret = btrfs_next_leaf(root, &path);
7564                         if (ret < 0)
7565                                 goto out;
7566                         if (ret > 0) {
7567                                 ret = 0;
7568                                 break;
7569                         }
7570                 }
7571                 leaf = path.nodes[0];
7572                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7573                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7574                         break;
7575                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7576                     key.type != BTRFS_METADATA_ITEM_KEY) {
7577                         path.slots[0]++;
7578                         continue;
7579                 }
7580
7581                 if (last == key.objectid) {
7582                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7583                                 last = key.objectid + key.offset;
7584                         else
7585                                 last = key.objectid + root->fs_info->nodesize;
7586                         path.slots[0]++;
7587                         continue;
7588                 }
7589
7590                 ret = check_cache_range(root, cache, last,
7591                                         key.objectid - last);
7592                 if (ret)
7593                         break;
7594                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7595                         last = key.objectid + key.offset;
7596                 else
7597                         last = key.objectid + root->fs_info->nodesize;
7598                 path.slots[0]++;
7599         }
7600
7601         if (last < cache->key.objectid + cache->key.offset)
7602                 ret = check_cache_range(root, cache, last,
7603                                         cache->key.objectid +
7604                                         cache->key.offset - last);
7605
7606 out:
7607         btrfs_release_path(&path);
7608
7609         if (!ret &&
7610             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7611                 fprintf(stderr, "There are still entries left in the space "
7612                         "cache\n");
7613                 ret = -EINVAL;
7614         }
7615
7616         return ret;
7617 }
7618
7619 static int check_space_cache(struct btrfs_root *root)
7620 {
7621         struct btrfs_block_group_cache *cache;
7622         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7623         int ret;
7624         int error = 0;
7625
7626         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7627             btrfs_super_generation(root->fs_info->super_copy) !=
7628             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7629                 printf("cache and super generation don't match, space cache "
7630                        "will be invalidated\n");
7631                 return 0;
7632         }
7633
7634         if (ctx.progress_enabled) {
7635                 ctx.tp = TASK_FREE_SPACE;
7636                 task_start(ctx.info);
7637         }
7638
7639         while (1) {
7640                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7641                 if (!cache)
7642                         break;
7643
7644                 start = cache->key.objectid + cache->key.offset;
7645                 if (!cache->free_space_ctl) {
7646                         if (btrfs_init_free_space_ctl(cache,
7647                                                 root->fs_info->sectorsize)) {
7648                                 ret = -ENOMEM;
7649                                 break;
7650                         }
7651                 } else {
7652                         btrfs_remove_free_space_cache(cache);
7653                 }
7654
7655                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7656                         ret = exclude_super_stripes(root, cache);
7657                         if (ret) {
7658                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7659                                         strerror(-ret));
7660                                 error++;
7661                                 continue;
7662                         }
7663                         ret = load_free_space_tree(root->fs_info, cache);
7664                         free_excluded_extents(root, cache);
7665                         if (ret < 0) {
7666                                 fprintf(stderr, "could not load free space tree: %s\n",
7667                                         strerror(-ret));
7668                                 error++;
7669                                 continue;
7670                         }
7671                         error += ret;
7672                 } else {
7673                         ret = load_free_space_cache(root->fs_info, cache);
7674                         if (!ret)
7675                                 continue;
7676                 }
7677
7678                 ret = verify_space_cache(root, cache);
7679                 if (ret) {
7680                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7681                                 cache->key.objectid);
7682                         error++;
7683                 }
7684         }
7685
7686         task_stop(ctx.info);
7687
7688         return error ? -EINVAL : 0;
7689 }
7690
7691 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7692                         u64 num_bytes, unsigned long leaf_offset,
7693                         struct extent_buffer *eb) {
7694
7695         struct btrfs_fs_info *fs_info = root->fs_info;
7696         u64 offset = 0;
7697         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7698         char *data;
7699         unsigned long csum_offset;
7700         u32 csum;
7701         u32 csum_expected;
7702         u64 read_len;
7703         u64 data_checked = 0;
7704         u64 tmp;
7705         int ret = 0;
7706         int mirror;
7707         int num_copies;
7708
7709         if (num_bytes % fs_info->sectorsize)
7710                 return -EINVAL;
7711
7712         data = malloc(num_bytes);
7713         if (!data)
7714                 return -ENOMEM;
7715
7716         while (offset < num_bytes) {
7717                 mirror = 0;
7718 again:
7719                 read_len = num_bytes - offset;
7720                 /* read as much space once a time */
7721                 ret = read_extent_data(fs_info, data + offset,
7722                                 bytenr + offset, &read_len, mirror);
7723                 if (ret)
7724                         goto out;
7725                 data_checked = 0;
7726                 /* verify every 4k data's checksum */
7727                 while (data_checked < read_len) {
7728                         csum = ~(u32)0;
7729                         tmp = offset + data_checked;
7730
7731                         csum = btrfs_csum_data((char *)data + tmp,
7732                                                csum, fs_info->sectorsize);
7733                         btrfs_csum_final(csum, (u8 *)&csum);
7734
7735                         csum_offset = leaf_offset +
7736                                  tmp / fs_info->sectorsize * csum_size;
7737                         read_extent_buffer(eb, (char *)&csum_expected,
7738                                            csum_offset, csum_size);
7739                         /* try another mirror */
7740                         if (csum != csum_expected) {
7741                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7742                                                 mirror, bytenr + tmp,
7743                                                 csum, csum_expected);
7744                                 num_copies = btrfs_num_copies(root->fs_info,
7745                                                 bytenr, num_bytes);
7746                                 if (mirror < num_copies - 1) {
7747                                         mirror += 1;
7748                                         goto again;
7749                                 }
7750                         }
7751                         data_checked += fs_info->sectorsize;
7752                 }
7753                 offset += read_len;
7754         }
7755 out:
7756         free(data);
7757         return ret;
7758 }
7759
7760 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7761                                u64 num_bytes)
7762 {
7763         struct btrfs_path path;
7764         struct extent_buffer *leaf;
7765         struct btrfs_key key;
7766         int ret;
7767
7768         btrfs_init_path(&path);
7769         key.objectid = bytenr;
7770         key.type = BTRFS_EXTENT_ITEM_KEY;
7771         key.offset = (u64)-1;
7772
7773 again:
7774         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7775                                 0, 0);
7776         if (ret < 0) {
7777                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7778                 btrfs_release_path(&path);
7779                 return ret;
7780         } else if (ret) {
7781                 if (path.slots[0] > 0) {
7782                         path.slots[0]--;
7783                 } else {
7784                         ret = btrfs_prev_leaf(root, &path);
7785                         if (ret < 0) {
7786                                 goto out;
7787                         } else if (ret > 0) {
7788                                 ret = 0;
7789                                 goto out;
7790                         }
7791                 }
7792         }
7793
7794         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7795
7796         /*
7797          * Block group items come before extent items if they have the same
7798          * bytenr, so walk back one more just in case.  Dear future traveller,
7799          * first congrats on mastering time travel.  Now if it's not too much
7800          * trouble could you go back to 2006 and tell Chris to make the
7801          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7802          * EXTENT_ITEM_KEY please?
7803          */
7804         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7805                 if (path.slots[0] > 0) {
7806                         path.slots[0]--;
7807                 } else {
7808                         ret = btrfs_prev_leaf(root, &path);
7809                         if (ret < 0) {
7810                                 goto out;
7811                         } else if (ret > 0) {
7812                                 ret = 0;
7813                                 goto out;
7814                         }
7815                 }
7816                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7817         }
7818
7819         while (num_bytes) {
7820                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7821                         ret = btrfs_next_leaf(root, &path);
7822                         if (ret < 0) {
7823                                 fprintf(stderr, "Error going to next leaf "
7824                                         "%d\n", ret);
7825                                 btrfs_release_path(&path);
7826                                 return ret;
7827                         } else if (ret) {
7828                                 break;
7829                         }
7830                 }
7831                 leaf = path.nodes[0];
7832                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7833                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7834                         path.slots[0]++;
7835                         continue;
7836                 }
7837                 if (key.objectid + key.offset < bytenr) {
7838                         path.slots[0]++;
7839                         continue;
7840                 }
7841                 if (key.objectid > bytenr + num_bytes)
7842                         break;
7843
7844                 if (key.objectid == bytenr) {
7845                         if (key.offset >= num_bytes) {
7846                                 num_bytes = 0;
7847                                 break;
7848                         }
7849                         num_bytes -= key.offset;
7850                         bytenr += key.offset;
7851                 } else if (key.objectid < bytenr) {
7852                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7853                                 num_bytes = 0;
7854                                 break;
7855                         }
7856                         num_bytes = (bytenr + num_bytes) -
7857                                 (key.objectid + key.offset);
7858                         bytenr = key.objectid + key.offset;
7859                 } else {
7860                         if (key.objectid + key.offset < bytenr + num_bytes) {
7861                                 u64 new_start = key.objectid + key.offset;
7862                                 u64 new_bytes = bytenr + num_bytes - new_start;
7863
7864                                 /*
7865                                  * Weird case, the extent is in the middle of
7866                                  * our range, we'll have to search one side
7867                                  * and then the other.  Not sure if this happens
7868                                  * in real life, but no harm in coding it up
7869                                  * anyway just in case.
7870                                  */
7871                                 btrfs_release_path(&path);
7872                                 ret = check_extent_exists(root, new_start,
7873                                                           new_bytes);
7874                                 if (ret) {
7875                                         fprintf(stderr, "Right section didn't "
7876                                                 "have a record\n");
7877                                         break;
7878                                 }
7879                                 num_bytes = key.objectid - bytenr;
7880                                 goto again;
7881                         }
7882                         num_bytes = key.objectid - bytenr;
7883                 }
7884                 path.slots[0]++;
7885         }
7886         ret = 0;
7887
7888 out:
7889         if (num_bytes && !ret) {
7890                 fprintf(stderr, "There are no extents for csum range "
7891                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7892                 ret = 1;
7893         }
7894
7895         btrfs_release_path(&path);
7896         return ret;
7897 }
7898
7899 static int check_csums(struct btrfs_root *root)
7900 {
7901         struct btrfs_path path;
7902         struct extent_buffer *leaf;
7903         struct btrfs_key key;
7904         u64 offset = 0, num_bytes = 0;
7905         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7906         int errors = 0;
7907         int ret;
7908         u64 data_len;
7909         unsigned long leaf_offset;
7910
7911         root = root->fs_info->csum_root;
7912         if (!extent_buffer_uptodate(root->node)) {
7913                 fprintf(stderr, "No valid csum tree found\n");
7914                 return -ENOENT;
7915         }
7916
7917         btrfs_init_path(&path);
7918         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7919         key.type = BTRFS_EXTENT_CSUM_KEY;
7920         key.offset = 0;
7921         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7922         if (ret < 0) {
7923                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7924                 btrfs_release_path(&path);
7925                 return ret;
7926         }
7927
7928         if (ret > 0 && path.slots[0])
7929                 path.slots[0]--;
7930         ret = 0;
7931
7932         while (1) {
7933                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7934                         ret = btrfs_next_leaf(root, &path);
7935                         if (ret < 0) {
7936                                 fprintf(stderr, "Error going to next leaf "
7937                                         "%d\n", ret);
7938                                 break;
7939                         }
7940                         if (ret)
7941                                 break;
7942                 }
7943                 leaf = path.nodes[0];
7944
7945                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7946                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7947                         path.slots[0]++;
7948                         continue;
7949                 }
7950
7951                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7952                               csum_size) * root->fs_info->sectorsize;
7953                 if (!check_data_csum)
7954                         goto skip_csum_check;
7955                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7956                 ret = check_extent_csums(root, key.offset, data_len,
7957                                          leaf_offset, leaf);
7958                 if (ret)
7959                         break;
7960 skip_csum_check:
7961                 if (!num_bytes) {
7962                         offset = key.offset;
7963                 } else if (key.offset != offset + num_bytes) {
7964                         ret = check_extent_exists(root, offset, num_bytes);
7965                         if (ret) {
7966                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7967                                         "there is no extent record\n",
7968                                         offset, offset+num_bytes);
7969                                 errors++;
7970                         }
7971                         offset = key.offset;
7972                         num_bytes = 0;
7973                 }
7974                 num_bytes += data_len;
7975                 path.slots[0]++;
7976         }
7977
7978         btrfs_release_path(&path);
7979         return errors;
7980 }
7981
7982 static int is_dropped_key(struct btrfs_key *key,
7983                           struct btrfs_key *drop_key) {
7984         if (key->objectid < drop_key->objectid)
7985                 return 1;
7986         else if (key->objectid == drop_key->objectid) {
7987                 if (key->type < drop_key->type)
7988                         return 1;
7989                 else if (key->type == drop_key->type) {
7990                         if (key->offset < drop_key->offset)
7991                                 return 1;
7992                 }
7993         }
7994         return 0;
7995 }
7996
7997 /*
7998  * Here are the rules for FULL_BACKREF.
7999  *
8000  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8001  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8002  *      FULL_BACKREF set.
8003  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8004  *    if it happened after the relocation occurred since we'll have dropped the
8005  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8006  *    have no real way to know for sure.
8007  *
8008  * We process the blocks one root at a time, and we start from the lowest root
8009  * objectid and go to the highest.  So we can just lookup the owner backref for
8010  * the record and if we don't find it then we know it doesn't exist and we have
8011  * a FULL BACKREF.
8012  *
8013  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8014  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8015  * be set or not and then we can check later once we've gathered all the refs.
8016  */
8017 static int calc_extent_flag(struct cache_tree *extent_cache,
8018                            struct extent_buffer *buf,
8019                            struct root_item_record *ri,
8020                            u64 *flags)
8021 {
8022         struct extent_record *rec;
8023         struct cache_extent *cache;
8024         struct tree_backref *tback;
8025         u64 owner = 0;
8026
8027         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8028         /* we have added this extent before */
8029         if (!cache)
8030                 return -ENOENT;
8031
8032         rec = container_of(cache, struct extent_record, cache);
8033
8034         /*
8035          * Except file/reloc tree, we can not have
8036          * FULL BACKREF MODE
8037          */
8038         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8039                 goto normal;
8040         /*
8041          * root node
8042          */
8043         if (buf->start == ri->bytenr)
8044                 goto normal;
8045
8046         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8047                 goto full_backref;
8048
8049         owner = btrfs_header_owner(buf);
8050         if (owner == ri->objectid)
8051                 goto normal;
8052
8053         tback = find_tree_backref(rec, 0, owner);
8054         if (!tback)
8055                 goto full_backref;
8056 normal:
8057         *flags = 0;
8058         if (rec->flag_block_full_backref != FLAG_UNSET &&
8059             rec->flag_block_full_backref != 0)
8060                 rec->bad_full_backref = 1;
8061         return 0;
8062 full_backref:
8063         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8064         if (rec->flag_block_full_backref != FLAG_UNSET &&
8065             rec->flag_block_full_backref != 1)
8066                 rec->bad_full_backref = 1;
8067         return 0;
8068 }
8069
8070 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8071 {
8072         fprintf(stderr, "Invalid key type(");
8073         print_key_type(stderr, 0, key_type);
8074         fprintf(stderr, ") found in root(");
8075         print_objectid(stderr, rootid, 0);
8076         fprintf(stderr, ")\n");
8077 }
8078
8079 /*
8080  * Check if the key is valid with its extent buffer.
8081  *
8082  * This is a early check in case invalid key exists in a extent buffer
8083  * This is not comprehensive yet, but should prevent wrong key/item passed
8084  * further
8085  */
8086 static int check_type_with_root(u64 rootid, u8 key_type)
8087 {
8088         switch (key_type) {
8089         /* Only valid in chunk tree */
8090         case BTRFS_DEV_ITEM_KEY:
8091         case BTRFS_CHUNK_ITEM_KEY:
8092                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8093                         goto err;
8094                 break;
8095         /* valid in csum and log tree */
8096         case BTRFS_CSUM_TREE_OBJECTID:
8097                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8098                       is_fstree(rootid)))
8099                         goto err;
8100                 break;
8101         case BTRFS_EXTENT_ITEM_KEY:
8102         case BTRFS_METADATA_ITEM_KEY:
8103         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8104                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8105                         goto err;
8106                 break;
8107         case BTRFS_ROOT_ITEM_KEY:
8108                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8109                         goto err;
8110                 break;
8111         case BTRFS_DEV_EXTENT_KEY:
8112                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8113                         goto err;
8114                 break;
8115         }
8116         return 0;
8117 err:
8118         report_mismatch_key_root(key_type, rootid);
8119         return -EINVAL;
8120 }
8121
8122 static int run_next_block(struct btrfs_root *root,
8123                           struct block_info *bits,
8124                           int bits_nr,
8125                           u64 *last,
8126                           struct cache_tree *pending,
8127                           struct cache_tree *seen,
8128                           struct cache_tree *reada,
8129                           struct cache_tree *nodes,
8130                           struct cache_tree *extent_cache,
8131                           struct cache_tree *chunk_cache,
8132                           struct rb_root *dev_cache,
8133                           struct block_group_tree *block_group_cache,
8134                           struct device_extent_tree *dev_extent_cache,
8135                           struct root_item_record *ri)
8136 {
8137         struct btrfs_fs_info *fs_info = root->fs_info;
8138         struct extent_buffer *buf;
8139         struct extent_record *rec = NULL;
8140         u64 bytenr;
8141         u32 size;
8142         u64 parent;
8143         u64 owner;
8144         u64 flags;
8145         u64 ptr;
8146         u64 gen = 0;
8147         int ret = 0;
8148         int i;
8149         int nritems;
8150         struct btrfs_key key;
8151         struct cache_extent *cache;
8152         int reada_bits;
8153
8154         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8155                                     bits_nr, &reada_bits);
8156         if (nritems == 0)
8157                 return 1;
8158
8159         if (!reada_bits) {
8160                 for(i = 0; i < nritems; i++) {
8161                         ret = add_cache_extent(reada, bits[i].start,
8162                                                bits[i].size);
8163                         if (ret == -EEXIST)
8164                                 continue;
8165
8166                         /* fixme, get the parent transid */
8167                         readahead_tree_block(fs_info, bits[i].start, 0);
8168                 }
8169         }
8170         *last = bits[0].start;
8171         bytenr = bits[0].start;
8172         size = bits[0].size;
8173
8174         cache = lookup_cache_extent(pending, bytenr, size);
8175         if (cache) {
8176                 remove_cache_extent(pending, cache);
8177                 free(cache);
8178         }
8179         cache = lookup_cache_extent(reada, bytenr, size);
8180         if (cache) {
8181                 remove_cache_extent(reada, cache);
8182                 free(cache);
8183         }
8184         cache = lookup_cache_extent(nodes, bytenr, size);
8185         if (cache) {
8186                 remove_cache_extent(nodes, cache);
8187                 free(cache);
8188         }
8189         cache = lookup_cache_extent(extent_cache, bytenr, size);
8190         if (cache) {
8191                 rec = container_of(cache, struct extent_record, cache);
8192                 gen = rec->parent_generation;
8193         }
8194
8195         /* fixme, get the real parent transid */
8196         buf = read_tree_block(root->fs_info, bytenr, gen);
8197         if (!extent_buffer_uptodate(buf)) {
8198                 record_bad_block_io(root->fs_info,
8199                                     extent_cache, bytenr, size);
8200                 goto out;
8201         }
8202
8203         nritems = btrfs_header_nritems(buf);
8204
8205         flags = 0;
8206         if (!init_extent_tree) {
8207                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8208                                        btrfs_header_level(buf), 1, NULL,
8209                                        &flags);
8210                 if (ret < 0) {
8211                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8212                         if (ret < 0) {
8213                                 fprintf(stderr, "Couldn't calc extent flags\n");
8214                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8215                         }
8216                 }
8217         } else {
8218                 flags = 0;
8219                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8220                 if (ret < 0) {
8221                         fprintf(stderr, "Couldn't calc extent flags\n");
8222                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8223                 }
8224         }
8225
8226         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8227                 if (ri != NULL &&
8228                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8229                     ri->objectid == btrfs_header_owner(buf)) {
8230                         /*
8231                          * Ok we got to this block from it's original owner and
8232                          * we have FULL_BACKREF set.  Relocation can leave
8233                          * converted blocks over so this is altogether possible,
8234                          * however it's not possible if the generation > the
8235                          * last snapshot, so check for this case.
8236                          */
8237                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8238                             btrfs_header_generation(buf) > ri->last_snapshot) {
8239                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8240                                 rec->bad_full_backref = 1;
8241                         }
8242                 }
8243         } else {
8244                 if (ri != NULL &&
8245                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8246                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8247                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8248                         rec->bad_full_backref = 1;
8249                 }
8250         }
8251
8252         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8253                 rec->flag_block_full_backref = 1;
8254                 parent = bytenr;
8255                 owner = 0;
8256         } else {
8257                 rec->flag_block_full_backref = 0;
8258                 parent = 0;
8259                 owner = btrfs_header_owner(buf);
8260         }
8261
8262         ret = check_block(root, extent_cache, buf, flags);
8263         if (ret)
8264                 goto out;
8265
8266         if (btrfs_is_leaf(buf)) {
8267                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8268                 for (i = 0; i < nritems; i++) {
8269                         struct btrfs_file_extent_item *fi;
8270                         btrfs_item_key_to_cpu(buf, &key, i);
8271                         /*
8272                          * Check key type against the leaf owner.
8273                          * Could filter quite a lot of early error if
8274                          * owner is correct
8275                          */
8276                         if (check_type_with_root(btrfs_header_owner(buf),
8277                                                  key.type)) {
8278                                 fprintf(stderr, "ignoring invalid key\n");
8279                                 continue;
8280                         }
8281                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8282                                 process_extent_item(root, extent_cache, buf,
8283                                                     i);
8284                                 continue;
8285                         }
8286                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8287                                 process_extent_item(root, extent_cache, buf,
8288                                                     i);
8289                                 continue;
8290                         }
8291                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8292                                 total_csum_bytes +=
8293                                         btrfs_item_size_nr(buf, i);
8294                                 continue;
8295                         }
8296                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8297                                 process_chunk_item(chunk_cache, &key, buf, i);
8298                                 continue;
8299                         }
8300                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8301                                 process_device_item(dev_cache, &key, buf, i);
8302                                 continue;
8303                         }
8304                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8305                                 process_block_group_item(block_group_cache,
8306                                         &key, buf, i);
8307                                 continue;
8308                         }
8309                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8310                                 process_device_extent_item(dev_extent_cache,
8311                                         &key, buf, i);
8312                                 continue;
8313
8314                         }
8315                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8316 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8317                                 process_extent_ref_v0(extent_cache, buf, i);
8318 #else
8319                                 BUG();
8320 #endif
8321                                 continue;
8322                         }
8323
8324                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8325                                 ret = add_tree_backref(extent_cache,
8326                                                 key.objectid, 0, key.offset, 0);
8327                                 if (ret < 0)
8328                                         error(
8329                                 "add_tree_backref failed (leaf tree block): %s",
8330                                               strerror(-ret));
8331                                 continue;
8332                         }
8333                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8334                                 ret = add_tree_backref(extent_cache,
8335                                                 key.objectid, key.offset, 0, 0);
8336                                 if (ret < 0)
8337                                         error(
8338                                 "add_tree_backref failed (leaf shared block): %s",
8339                                               strerror(-ret));
8340                                 continue;
8341                         }
8342                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8343                                 struct btrfs_extent_data_ref *ref;
8344                                 ref = btrfs_item_ptr(buf, i,
8345                                                 struct btrfs_extent_data_ref);
8346                                 add_data_backref(extent_cache,
8347                                         key.objectid, 0,
8348                                         btrfs_extent_data_ref_root(buf, ref),
8349                                         btrfs_extent_data_ref_objectid(buf,
8350                                                                        ref),
8351                                         btrfs_extent_data_ref_offset(buf, ref),
8352                                         btrfs_extent_data_ref_count(buf, ref),
8353                                         0, root->fs_info->sectorsize);
8354                                 continue;
8355                         }
8356                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8357                                 struct btrfs_shared_data_ref *ref;
8358                                 ref = btrfs_item_ptr(buf, i,
8359                                                 struct btrfs_shared_data_ref);
8360                                 add_data_backref(extent_cache,
8361                                         key.objectid, key.offset, 0, 0, 0,
8362                                         btrfs_shared_data_ref_count(buf, ref),
8363                                         0, root->fs_info->sectorsize);
8364                                 continue;
8365                         }
8366                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8367                                 struct bad_item *bad;
8368
8369                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8370                                         continue;
8371                                 if (!owner)
8372                                         continue;
8373                                 bad = malloc(sizeof(struct bad_item));
8374                                 if (!bad)
8375                                         continue;
8376                                 INIT_LIST_HEAD(&bad->list);
8377                                 memcpy(&bad->key, &key,
8378                                        sizeof(struct btrfs_key));
8379                                 bad->root_id = owner;
8380                                 list_add_tail(&bad->list, &delete_items);
8381                                 continue;
8382                         }
8383                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8384                                 continue;
8385                         fi = btrfs_item_ptr(buf, i,
8386                                             struct btrfs_file_extent_item);
8387                         if (btrfs_file_extent_type(buf, fi) ==
8388                             BTRFS_FILE_EXTENT_INLINE)
8389                                 continue;
8390                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8391                                 continue;
8392
8393                         data_bytes_allocated +=
8394                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8395                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8396                                 abort();
8397                         }
8398                         data_bytes_referenced +=
8399                                 btrfs_file_extent_num_bytes(buf, fi);
8400                         add_data_backref(extent_cache,
8401                                 btrfs_file_extent_disk_bytenr(buf, fi),
8402                                 parent, owner, key.objectid, key.offset -
8403                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8404                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8405                 }
8406         } else {
8407                 int level;
8408                 struct btrfs_key first_key;
8409
8410                 first_key.objectid = 0;
8411
8412                 if (nritems > 0)
8413                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8414                 level = btrfs_header_level(buf);
8415                 for (i = 0; i < nritems; i++) {
8416                         struct extent_record tmpl;
8417
8418                         ptr = btrfs_node_blockptr(buf, i);
8419                         size = root->fs_info->nodesize;
8420                         btrfs_node_key_to_cpu(buf, &key, i);
8421                         if (ri != NULL) {
8422                                 if ((level == ri->drop_level)
8423                                     && is_dropped_key(&key, &ri->drop_key)) {
8424                                         continue;
8425                                 }
8426                         }
8427
8428                         memset(&tmpl, 0, sizeof(tmpl));
8429                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8430                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8431                         tmpl.start = ptr;
8432                         tmpl.nr = size;
8433                         tmpl.refs = 1;
8434                         tmpl.metadata = 1;
8435                         tmpl.max_size = size;
8436                         ret = add_extent_rec(extent_cache, &tmpl);
8437                         if (ret < 0)
8438                                 goto out;
8439
8440                         ret = add_tree_backref(extent_cache, ptr, parent,
8441                                         owner, 1);
8442                         if (ret < 0) {
8443                                 error(
8444                                 "add_tree_backref failed (non-leaf block): %s",
8445                                       strerror(-ret));
8446                                 continue;
8447                         }
8448
8449                         if (level > 1) {
8450                                 add_pending(nodes, seen, ptr, size);
8451                         } else {
8452                                 add_pending(pending, seen, ptr, size);
8453                         }
8454                 }
8455                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8456                                       nritems) * sizeof(struct btrfs_key_ptr);
8457         }
8458         total_btree_bytes += buf->len;
8459         if (fs_root_objectid(btrfs_header_owner(buf)))
8460                 total_fs_tree_bytes += buf->len;
8461         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8462                 total_extent_tree_bytes += buf->len;
8463 out:
8464         free_extent_buffer(buf);
8465         return ret;
8466 }
8467
8468 static int add_root_to_pending(struct extent_buffer *buf,
8469                                struct cache_tree *extent_cache,
8470                                struct cache_tree *pending,
8471                                struct cache_tree *seen,
8472                                struct cache_tree *nodes,
8473                                u64 objectid)
8474 {
8475         struct extent_record tmpl;
8476         int ret;
8477
8478         if (btrfs_header_level(buf) > 0)
8479                 add_pending(nodes, seen, buf->start, buf->len);
8480         else
8481                 add_pending(pending, seen, buf->start, buf->len);
8482
8483         memset(&tmpl, 0, sizeof(tmpl));
8484         tmpl.start = buf->start;
8485         tmpl.nr = buf->len;
8486         tmpl.is_root = 1;
8487         tmpl.refs = 1;
8488         tmpl.metadata = 1;
8489         tmpl.max_size = buf->len;
8490         add_extent_rec(extent_cache, &tmpl);
8491
8492         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8493             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8494                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8495                                 0, 1);
8496         else
8497                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8498                                 1);
8499         return ret;
8500 }
8501
8502 /* as we fix the tree, we might be deleting blocks that
8503  * we're tracking for repair.  This hook makes sure we
8504  * remove any backrefs for blocks as we are fixing them.
8505  */
8506 static int free_extent_hook(struct btrfs_trans_handle *trans,
8507                             struct btrfs_root *root,
8508                             u64 bytenr, u64 num_bytes, u64 parent,
8509                             u64 root_objectid, u64 owner, u64 offset,
8510                             int refs_to_drop)
8511 {
8512         struct extent_record *rec;
8513         struct cache_extent *cache;
8514         int is_data;
8515         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8516
8517         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8518         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8519         if (!cache)
8520                 return 0;
8521
8522         rec = container_of(cache, struct extent_record, cache);
8523         if (is_data) {
8524                 struct data_backref *back;
8525                 back = find_data_backref(rec, parent, root_objectid, owner,
8526                                          offset, 1, bytenr, num_bytes);
8527                 if (!back)
8528                         goto out;
8529                 if (back->node.found_ref) {
8530                         back->found_ref -= refs_to_drop;
8531                         if (rec->refs)
8532                                 rec->refs -= refs_to_drop;
8533                 }
8534                 if (back->node.found_extent_tree) {
8535                         back->num_refs -= refs_to_drop;
8536                         if (rec->extent_item_refs)
8537                                 rec->extent_item_refs -= refs_to_drop;
8538                 }
8539                 if (back->found_ref == 0)
8540                         back->node.found_ref = 0;
8541                 if (back->num_refs == 0)
8542                         back->node.found_extent_tree = 0;
8543
8544                 if (!back->node.found_extent_tree && back->node.found_ref) {
8545                         rb_erase(&back->node.node, &rec->backref_tree);
8546                         free(back);
8547                 }
8548         } else {
8549                 struct tree_backref *back;
8550                 back = find_tree_backref(rec, parent, root_objectid);
8551                 if (!back)
8552                         goto out;
8553                 if (back->node.found_ref) {
8554                         if (rec->refs)
8555                                 rec->refs--;
8556                         back->node.found_ref = 0;
8557                 }
8558                 if (back->node.found_extent_tree) {
8559                         if (rec->extent_item_refs)
8560                                 rec->extent_item_refs--;
8561                         back->node.found_extent_tree = 0;
8562                 }
8563                 if (!back->node.found_extent_tree && back->node.found_ref) {
8564                         rb_erase(&back->node.node, &rec->backref_tree);
8565                         free(back);
8566                 }
8567         }
8568         maybe_free_extent_rec(extent_cache, rec);
8569 out:
8570         return 0;
8571 }
8572
8573 static int delete_extent_records(struct btrfs_trans_handle *trans,
8574                                  struct btrfs_root *root,
8575                                  struct btrfs_path *path,
8576                                  u64 bytenr)
8577 {
8578         struct btrfs_key key;
8579         struct btrfs_key found_key;
8580         struct extent_buffer *leaf;
8581         int ret;
8582         int slot;
8583
8584
8585         key.objectid = bytenr;
8586         key.type = (u8)-1;
8587         key.offset = (u64)-1;
8588
8589         while(1) {
8590                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8591                                         &key, path, 0, 1);
8592                 if (ret < 0)
8593                         break;
8594
8595                 if (ret > 0) {
8596                         ret = 0;
8597                         if (path->slots[0] == 0)
8598                                 break;
8599                         path->slots[0]--;
8600                 }
8601                 ret = 0;
8602
8603                 leaf = path->nodes[0];
8604                 slot = path->slots[0];
8605
8606                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8607                 if (found_key.objectid != bytenr)
8608                         break;
8609
8610                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8611                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8612                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8613                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8614                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8615                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8616                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8617                         btrfs_release_path(path);
8618                         if (found_key.type == 0) {
8619                                 if (found_key.offset == 0)
8620                                         break;
8621                                 key.offset = found_key.offset - 1;
8622                                 key.type = found_key.type;
8623                         }
8624                         key.type = found_key.type - 1;
8625                         key.offset = (u64)-1;
8626                         continue;
8627                 }
8628
8629                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8630                         found_key.objectid, found_key.type, found_key.offset);
8631
8632                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8633                 if (ret)
8634                         break;
8635                 btrfs_release_path(path);
8636
8637                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8638                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8639                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8640                                 found_key.offset : root->fs_info->nodesize;
8641
8642                         ret = btrfs_update_block_group(trans, root, bytenr,
8643                                                        bytes, 0, 0);
8644                         if (ret)
8645                                 break;
8646                 }
8647         }
8648
8649         btrfs_release_path(path);
8650         return ret;
8651 }
8652
8653 /*
8654  * for a single backref, this will allocate a new extent
8655  * and add the backref to it.
8656  */
8657 static int record_extent(struct btrfs_trans_handle *trans,
8658                          struct btrfs_fs_info *info,
8659                          struct btrfs_path *path,
8660                          struct extent_record *rec,
8661                          struct extent_backref *back,
8662                          int allocated, u64 flags)
8663 {
8664         int ret = 0;
8665         struct btrfs_root *extent_root = info->extent_root;
8666         struct extent_buffer *leaf;
8667         struct btrfs_key ins_key;
8668         struct btrfs_extent_item *ei;
8669         struct data_backref *dback;
8670         struct btrfs_tree_block_info *bi;
8671
8672         if (!back->is_data)
8673                 rec->max_size = max_t(u64, rec->max_size,
8674                                     info->nodesize);
8675
8676         if (!allocated) {
8677                 u32 item_size = sizeof(*ei);
8678
8679                 if (!back->is_data)
8680                         item_size += sizeof(*bi);
8681
8682                 ins_key.objectid = rec->start;
8683                 ins_key.offset = rec->max_size;
8684                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8685
8686                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8687                                         &ins_key, item_size);
8688                 if (ret)
8689                         goto fail;
8690
8691                 leaf = path->nodes[0];
8692                 ei = btrfs_item_ptr(leaf, path->slots[0],
8693                                     struct btrfs_extent_item);
8694
8695                 btrfs_set_extent_refs(leaf, ei, 0);
8696                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8697
8698                 if (back->is_data) {
8699                         btrfs_set_extent_flags(leaf, ei,
8700                                                BTRFS_EXTENT_FLAG_DATA);
8701                 } else {
8702                         struct btrfs_disk_key copy_key;;
8703
8704                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8705                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8706                                              sizeof(*bi));
8707
8708                         btrfs_set_disk_key_objectid(&copy_key,
8709                                                     rec->info_objectid);
8710                         btrfs_set_disk_key_type(&copy_key, 0);
8711                         btrfs_set_disk_key_offset(&copy_key, 0);
8712
8713                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8714                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8715
8716                         btrfs_set_extent_flags(leaf, ei,
8717                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8718                 }
8719
8720                 btrfs_mark_buffer_dirty(leaf);
8721                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8722                                                rec->max_size, 1, 0);
8723                 if (ret)
8724                         goto fail;
8725                 btrfs_release_path(path);
8726         }
8727
8728         if (back->is_data) {
8729                 u64 parent;
8730                 int i;
8731
8732                 dback = to_data_backref(back);
8733                 if (back->full_backref)
8734                         parent = dback->parent;
8735                 else
8736                         parent = 0;
8737
8738                 for (i = 0; i < dback->found_ref; i++) {
8739                         /* if parent != 0, we're doing a full backref
8740                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8741                          * just makes the backref allocator create a data
8742                          * backref
8743                          */
8744                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8745                                                    rec->start, rec->max_size,
8746                                                    parent,
8747                                                    dback->root,
8748                                                    parent ?
8749                                                    BTRFS_FIRST_FREE_OBJECTID :
8750                                                    dback->owner,
8751                                                    dback->offset);
8752                         if (ret)
8753                                 break;
8754                 }
8755                 fprintf(stderr, "adding new data backref"
8756                                 " on %llu %s %llu owner %llu"
8757                                 " offset %llu found %d\n",
8758                                 (unsigned long long)rec->start,
8759                                 back->full_backref ?
8760                                 "parent" : "root",
8761                                 back->full_backref ?
8762                                 (unsigned long long)parent :
8763                                 (unsigned long long)dback->root,
8764                                 (unsigned long long)dback->owner,
8765                                 (unsigned long long)dback->offset,
8766                                 dback->found_ref);
8767         } else {
8768                 u64 parent;
8769                 struct tree_backref *tback;
8770
8771                 tback = to_tree_backref(back);
8772                 if (back->full_backref)
8773                         parent = tback->parent;
8774                 else
8775                         parent = 0;
8776
8777                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8778                                            rec->start, rec->max_size,
8779                                            parent, tback->root, 0, 0);
8780                 fprintf(stderr, "adding new tree backref on "
8781                         "start %llu len %llu parent %llu root %llu\n",
8782                         rec->start, rec->max_size, parent, tback->root);
8783         }
8784 fail:
8785         btrfs_release_path(path);
8786         return ret;
8787 }
8788
8789 static struct extent_entry *find_entry(struct list_head *entries,
8790                                        u64 bytenr, u64 bytes)
8791 {
8792         struct extent_entry *entry = NULL;
8793
8794         list_for_each_entry(entry, entries, list) {
8795                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8796                         return entry;
8797         }
8798
8799         return NULL;
8800 }
8801
8802 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8803 {
8804         struct extent_entry *entry, *best = NULL, *prev = NULL;
8805
8806         list_for_each_entry(entry, entries, list) {
8807                 /*
8808                  * If there are as many broken entries as entries then we know
8809                  * not to trust this particular entry.
8810                  */
8811                 if (entry->broken == entry->count)
8812                         continue;
8813
8814                 /*
8815                  * Special case, when there are only two entries and 'best' is
8816                  * the first one
8817                  */
8818                 if (!prev) {
8819                         best = entry;
8820                         prev = entry;
8821                         continue;
8822                 }
8823
8824                 /*
8825                  * If our current entry == best then we can't be sure our best
8826                  * is really the best, so we need to keep searching.
8827                  */
8828                 if (best && best->count == entry->count) {
8829                         prev = entry;
8830                         best = NULL;
8831                         continue;
8832                 }
8833
8834                 /* Prev == entry, not good enough, have to keep searching */
8835                 if (!prev->broken && prev->count == entry->count)
8836                         continue;
8837
8838                 if (!best)
8839                         best = (prev->count > entry->count) ? prev : entry;
8840                 else if (best->count < entry->count)
8841                         best = entry;
8842                 prev = entry;
8843         }
8844
8845         return best;
8846 }
8847
8848 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8849                       struct data_backref *dback, struct extent_entry *entry)
8850 {
8851         struct btrfs_trans_handle *trans;
8852         struct btrfs_root *root;
8853         struct btrfs_file_extent_item *fi;
8854         struct extent_buffer *leaf;
8855         struct btrfs_key key;
8856         u64 bytenr, bytes;
8857         int ret, err;
8858
8859         key.objectid = dback->root;
8860         key.type = BTRFS_ROOT_ITEM_KEY;
8861         key.offset = (u64)-1;
8862         root = btrfs_read_fs_root(info, &key);
8863         if (IS_ERR(root)) {
8864                 fprintf(stderr, "Couldn't find root for our ref\n");
8865                 return -EINVAL;
8866         }
8867
8868         /*
8869          * The backref points to the original offset of the extent if it was
8870          * split, so we need to search down to the offset we have and then walk
8871          * forward until we find the backref we're looking for.
8872          */
8873         key.objectid = dback->owner;
8874         key.type = BTRFS_EXTENT_DATA_KEY;
8875         key.offset = dback->offset;
8876         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8877         if (ret < 0) {
8878                 fprintf(stderr, "Error looking up ref %d\n", ret);
8879                 return ret;
8880         }
8881
8882         while (1) {
8883                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8884                         ret = btrfs_next_leaf(root, path);
8885                         if (ret) {
8886                                 fprintf(stderr, "Couldn't find our ref, next\n");
8887                                 return -EINVAL;
8888                         }
8889                 }
8890                 leaf = path->nodes[0];
8891                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8892                 if (key.objectid != dback->owner ||
8893                     key.type != BTRFS_EXTENT_DATA_KEY) {
8894                         fprintf(stderr, "Couldn't find our ref, search\n");
8895                         return -EINVAL;
8896                 }
8897                 fi = btrfs_item_ptr(leaf, path->slots[0],
8898                                     struct btrfs_file_extent_item);
8899                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8900                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8901
8902                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8903                         break;
8904                 path->slots[0]++;
8905         }
8906
8907         btrfs_release_path(path);
8908
8909         trans = btrfs_start_transaction(root, 1);
8910         if (IS_ERR(trans))
8911                 return PTR_ERR(trans);
8912
8913         /*
8914          * Ok we have the key of the file extent we want to fix, now we can cow
8915          * down to the thing and fix it.
8916          */
8917         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8918         if (ret < 0) {
8919                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8920                         key.objectid, key.type, key.offset, ret);
8921                 goto out;
8922         }
8923         if (ret > 0) {
8924                 fprintf(stderr, "Well that's odd, we just found this key "
8925                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8926                         key.offset);
8927                 ret = -EINVAL;
8928                 goto out;
8929         }
8930         leaf = path->nodes[0];
8931         fi = btrfs_item_ptr(leaf, path->slots[0],
8932                             struct btrfs_file_extent_item);
8933
8934         if (btrfs_file_extent_compression(leaf, fi) &&
8935             dback->disk_bytenr != entry->bytenr) {
8936                 fprintf(stderr, "Ref doesn't match the record start and is "
8937                         "compressed, please take a btrfs-image of this file "
8938                         "system and send it to a btrfs developer so they can "
8939                         "complete this functionality for bytenr %Lu\n",
8940                         dback->disk_bytenr);
8941                 ret = -EINVAL;
8942                 goto out;
8943         }
8944
8945         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8946                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8947         } else if (dback->disk_bytenr > entry->bytenr) {
8948                 u64 off_diff, offset;
8949
8950                 off_diff = dback->disk_bytenr - entry->bytenr;
8951                 offset = btrfs_file_extent_offset(leaf, fi);
8952                 if (dback->disk_bytenr + offset +
8953                     btrfs_file_extent_num_bytes(leaf, fi) >
8954                     entry->bytenr + entry->bytes) {
8955                         fprintf(stderr, "Ref is past the entry end, please "
8956                                 "take a btrfs-image of this file system and "
8957                                 "send it to a btrfs developer, ref %Lu\n",
8958                                 dback->disk_bytenr);
8959                         ret = -EINVAL;
8960                         goto out;
8961                 }
8962                 offset += off_diff;
8963                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8964                 btrfs_set_file_extent_offset(leaf, fi, offset);
8965         } else if (dback->disk_bytenr < entry->bytenr) {
8966                 u64 offset;
8967
8968                 offset = btrfs_file_extent_offset(leaf, fi);
8969                 if (dback->disk_bytenr + offset < entry->bytenr) {
8970                         fprintf(stderr, "Ref is before the entry start, please"
8971                                 " take a btrfs-image of this file system and "
8972                                 "send it to a btrfs developer, ref %Lu\n",
8973                                 dback->disk_bytenr);
8974                         ret = -EINVAL;
8975                         goto out;
8976                 }
8977
8978                 offset += dback->disk_bytenr;
8979                 offset -= entry->bytenr;
8980                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8981                 btrfs_set_file_extent_offset(leaf, fi, offset);
8982         }
8983
8984         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8985
8986         /*
8987          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8988          * only do this if we aren't using compression, otherwise it's a
8989          * trickier case.
8990          */
8991         if (!btrfs_file_extent_compression(leaf, fi))
8992                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8993         else
8994                 printf("ram bytes may be wrong?\n");
8995         btrfs_mark_buffer_dirty(leaf);
8996 out:
8997         err = btrfs_commit_transaction(trans, root);
8998         btrfs_release_path(path);
8999         return ret ? ret : err;
9000 }
9001
9002 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9003                            struct extent_record *rec)
9004 {
9005         struct extent_backref *back, *tmp;
9006         struct data_backref *dback;
9007         struct extent_entry *entry, *best = NULL;
9008         LIST_HEAD(entries);
9009         int nr_entries = 0;
9010         int broken_entries = 0;
9011         int ret = 0;
9012         short mismatch = 0;
9013
9014         /*
9015          * Metadata is easy and the backrefs should always agree on bytenr and
9016          * size, if not we've got bigger issues.
9017          */
9018         if (rec->metadata)
9019                 return 0;
9020
9021         rbtree_postorder_for_each_entry_safe(back, tmp,
9022                                              &rec->backref_tree, node) {
9023                 if (back->full_backref || !back->is_data)
9024                         continue;
9025
9026                 dback = to_data_backref(back);
9027
9028                 /*
9029                  * We only pay attention to backrefs that we found a real
9030                  * backref for.
9031                  */
9032                 if (dback->found_ref == 0)
9033                         continue;
9034
9035                 /*
9036                  * For now we only catch when the bytes don't match, not the
9037                  * bytenr.  We can easily do this at the same time, but I want
9038                  * to have a fs image to test on before we just add repair
9039                  * functionality willy-nilly so we know we won't screw up the
9040                  * repair.
9041                  */
9042
9043                 entry = find_entry(&entries, dback->disk_bytenr,
9044                                    dback->bytes);
9045                 if (!entry) {
9046                         entry = malloc(sizeof(struct extent_entry));
9047                         if (!entry) {
9048                                 ret = -ENOMEM;
9049                                 goto out;
9050                         }
9051                         memset(entry, 0, sizeof(*entry));
9052                         entry->bytenr = dback->disk_bytenr;
9053                         entry->bytes = dback->bytes;
9054                         list_add_tail(&entry->list, &entries);
9055                         nr_entries++;
9056                 }
9057
9058                 /*
9059                  * If we only have on entry we may think the entries agree when
9060                  * in reality they don't so we have to do some extra checking.
9061                  */
9062                 if (dback->disk_bytenr != rec->start ||
9063                     dback->bytes != rec->nr || back->broken)
9064                         mismatch = 1;
9065
9066                 if (back->broken) {
9067                         entry->broken++;
9068                         broken_entries++;
9069                 }
9070
9071                 entry->count++;
9072         }
9073
9074         /* Yay all the backrefs agree, carry on good sir */
9075         if (nr_entries <= 1 && !mismatch)
9076                 goto out;
9077
9078         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9079                 "%Lu\n", rec->start);
9080
9081         /*
9082          * First we want to see if the backrefs can agree amongst themselves who
9083          * is right, so figure out which one of the entries has the highest
9084          * count.
9085          */
9086         best = find_most_right_entry(&entries);
9087
9088         /*
9089          * Ok so we may have an even split between what the backrefs think, so
9090          * this is where we use the extent ref to see what it thinks.
9091          */
9092         if (!best) {
9093                 entry = find_entry(&entries, rec->start, rec->nr);
9094                 if (!entry && (!broken_entries || !rec->found_rec)) {
9095                         fprintf(stderr, "Backrefs don't agree with each other "
9096                                 "and extent record doesn't agree with anybody,"
9097                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9098                                 rec->start, rec->nr);
9099                         ret = -EINVAL;
9100                         goto out;
9101                 } else if (!entry) {
9102                         /*
9103                          * Ok our backrefs were broken, we'll assume this is the
9104                          * correct value and add an entry for this range.
9105                          */
9106                         entry = malloc(sizeof(struct extent_entry));
9107                         if (!entry) {
9108                                 ret = -ENOMEM;
9109                                 goto out;
9110                         }
9111                         memset(entry, 0, sizeof(*entry));
9112                         entry->bytenr = rec->start;
9113                         entry->bytes = rec->nr;
9114                         list_add_tail(&entry->list, &entries);
9115                         nr_entries++;
9116                 }
9117                 entry->count++;
9118                 best = find_most_right_entry(&entries);
9119                 if (!best) {
9120                         fprintf(stderr, "Backrefs and extent record evenly "
9121                                 "split on who is right, this is going to "
9122                                 "require user input to fix bytenr %Lu bytes "
9123                                 "%Lu\n", rec->start, rec->nr);
9124                         ret = -EINVAL;
9125                         goto out;
9126                 }
9127         }
9128
9129         /*
9130          * I don't think this can happen currently as we'll abort() if we catch
9131          * this case higher up, but in case somebody removes that we still can't
9132          * deal with it properly here yet, so just bail out of that's the case.
9133          */
9134         if (best->bytenr != rec->start) {
9135                 fprintf(stderr, "Extent start and backref starts don't match, "
9136                         "please use btrfs-image on this file system and send "
9137                         "it to a btrfs developer so they can make fsck fix "
9138                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9139                         rec->start, rec->nr);
9140                 ret = -EINVAL;
9141                 goto out;
9142         }
9143
9144         /*
9145          * Ok great we all agreed on an extent record, let's go find the real
9146          * references and fix up the ones that don't match.
9147          */
9148         rbtree_postorder_for_each_entry_safe(back, tmp,
9149                                              &rec->backref_tree, node) {
9150                 if (back->full_backref || !back->is_data)
9151                         continue;
9152
9153                 dback = to_data_backref(back);
9154
9155                 /*
9156                  * Still ignoring backrefs that don't have a real ref attached
9157                  * to them.
9158                  */
9159                 if (dback->found_ref == 0)
9160                         continue;
9161
9162                 if (dback->bytes == best->bytes &&
9163                     dback->disk_bytenr == best->bytenr)
9164                         continue;
9165
9166                 ret = repair_ref(info, path, dback, best);
9167                 if (ret)
9168                         goto out;
9169         }
9170
9171         /*
9172          * Ok we messed with the actual refs, which means we need to drop our
9173          * entire cache and go back and rescan.  I know this is a huge pain and
9174          * adds a lot of extra work, but it's the only way to be safe.  Once all
9175          * the backrefs agree we may not need to do anything to the extent
9176          * record itself.
9177          */
9178         ret = -EAGAIN;
9179 out:
9180         while (!list_empty(&entries)) {
9181                 entry = list_entry(entries.next, struct extent_entry, list);
9182                 list_del_init(&entry->list);
9183                 free(entry);
9184         }
9185         return ret;
9186 }
9187
9188 static int process_duplicates(struct cache_tree *extent_cache,
9189                               struct extent_record *rec)
9190 {
9191         struct extent_record *good, *tmp;
9192         struct cache_extent *cache;
9193         int ret;
9194
9195         /*
9196          * If we found a extent record for this extent then return, or if we
9197          * have more than one duplicate we are likely going to need to delete
9198          * something.
9199          */
9200         if (rec->found_rec || rec->num_duplicates > 1)
9201                 return 0;
9202
9203         /* Shouldn't happen but just in case */
9204         BUG_ON(!rec->num_duplicates);
9205
9206         /*
9207          * So this happens if we end up with a backref that doesn't match the
9208          * actual extent entry.  So either the backref is bad or the extent
9209          * entry is bad.  Either way we want to have the extent_record actually
9210          * reflect what we found in the extent_tree, so we need to take the
9211          * duplicate out and use that as the extent_record since the only way we
9212          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9213          */
9214         remove_cache_extent(extent_cache, &rec->cache);
9215
9216         good = to_extent_record(rec->dups.next);
9217         list_del_init(&good->list);
9218         INIT_LIST_HEAD(&good->backrefs);
9219         INIT_LIST_HEAD(&good->dups);
9220         good->cache.start = good->start;
9221         good->cache.size = good->nr;
9222         good->content_checked = 0;
9223         good->owner_ref_checked = 0;
9224         good->num_duplicates = 0;
9225         good->refs = rec->refs;
9226         list_splice_init(&rec->backrefs, &good->backrefs);
9227         while (1) {
9228                 cache = lookup_cache_extent(extent_cache, good->start,
9229                                             good->nr);
9230                 if (!cache)
9231                         break;
9232                 tmp = container_of(cache, struct extent_record, cache);
9233
9234                 /*
9235                  * If we find another overlapping extent and it's found_rec is
9236                  * set then it's a duplicate and we need to try and delete
9237                  * something.
9238                  */
9239                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9240                         if (list_empty(&good->list))
9241                                 list_add_tail(&good->list,
9242                                               &duplicate_extents);
9243                         good->num_duplicates += tmp->num_duplicates + 1;
9244                         list_splice_init(&tmp->dups, &good->dups);
9245                         list_del_init(&tmp->list);
9246                         list_add_tail(&tmp->list, &good->dups);
9247                         remove_cache_extent(extent_cache, &tmp->cache);
9248                         continue;
9249                 }
9250
9251                 /*
9252                  * Ok we have another non extent item backed extent rec, so lets
9253                  * just add it to this extent and carry on like we did above.
9254                  */
9255                 good->refs += tmp->refs;
9256                 list_splice_init(&tmp->backrefs, &good->backrefs);
9257                 remove_cache_extent(extent_cache, &tmp->cache);
9258                 free(tmp);
9259         }
9260         ret = insert_cache_extent(extent_cache, &good->cache);
9261         BUG_ON(ret);
9262         free(rec);
9263         return good->num_duplicates ? 0 : 1;
9264 }
9265
9266 static int delete_duplicate_records(struct btrfs_root *root,
9267                                     struct extent_record *rec)
9268 {
9269         struct btrfs_trans_handle *trans;
9270         LIST_HEAD(delete_list);
9271         struct btrfs_path path;
9272         struct extent_record *tmp, *good, *n;
9273         int nr_del = 0;
9274         int ret = 0, err;
9275         struct btrfs_key key;
9276
9277         btrfs_init_path(&path);
9278
9279         good = rec;
9280         /* Find the record that covers all of the duplicates. */
9281         list_for_each_entry(tmp, &rec->dups, list) {
9282                 if (good->start < tmp->start)
9283                         continue;
9284                 if (good->nr > tmp->nr)
9285                         continue;
9286
9287                 if (tmp->start + tmp->nr < good->start + good->nr) {
9288                         fprintf(stderr, "Ok we have overlapping extents that "
9289                                 "aren't completely covered by each other, this "
9290                                 "is going to require more careful thought.  "
9291                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9292                                 tmp->start, tmp->nr, good->start, good->nr);
9293                         abort();
9294                 }
9295                 good = tmp;
9296         }
9297
9298         if (good != rec)
9299                 list_add_tail(&rec->list, &delete_list);
9300
9301         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9302                 if (tmp == good)
9303                         continue;
9304                 list_move_tail(&tmp->list, &delete_list);
9305         }
9306
9307         root = root->fs_info->extent_root;
9308         trans = btrfs_start_transaction(root, 1);
9309         if (IS_ERR(trans)) {
9310                 ret = PTR_ERR(trans);
9311                 goto out;
9312         }
9313
9314         list_for_each_entry(tmp, &delete_list, list) {
9315                 if (tmp->found_rec == 0)
9316                         continue;
9317                 key.objectid = tmp->start;
9318                 key.type = BTRFS_EXTENT_ITEM_KEY;
9319                 key.offset = tmp->nr;
9320
9321                 /* Shouldn't happen but just in case */
9322                 if (tmp->metadata) {
9323                         fprintf(stderr, "Well this shouldn't happen, extent "
9324                                 "record overlaps but is metadata? "
9325                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9326                         abort();
9327                 }
9328
9329                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9330                 if (ret) {
9331                         if (ret > 0)
9332                                 ret = -EINVAL;
9333                         break;
9334                 }
9335                 ret = btrfs_del_item(trans, root, &path);
9336                 if (ret)
9337                         break;
9338                 btrfs_release_path(&path);
9339                 nr_del++;
9340         }
9341         err = btrfs_commit_transaction(trans, root);
9342         if (err && !ret)
9343                 ret = err;
9344 out:
9345         while (!list_empty(&delete_list)) {
9346                 tmp = to_extent_record(delete_list.next);
9347                 list_del_init(&tmp->list);
9348                 if (tmp == rec)
9349                         continue;
9350                 free(tmp);
9351         }
9352
9353         while (!list_empty(&rec->dups)) {
9354                 tmp = to_extent_record(rec->dups.next);
9355                 list_del_init(&tmp->list);
9356                 free(tmp);
9357         }
9358
9359         btrfs_release_path(&path);
9360
9361         if (!ret && !nr_del)
9362                 rec->num_duplicates = 0;
9363
9364         return ret ? ret : nr_del;
9365 }
9366
9367 static int find_possible_backrefs(struct btrfs_fs_info *info,
9368                                   struct btrfs_path *path,
9369                                   struct cache_tree *extent_cache,
9370                                   struct extent_record *rec)
9371 {
9372         struct btrfs_root *root;
9373         struct extent_backref *back, *tmp;
9374         struct data_backref *dback;
9375         struct cache_extent *cache;
9376         struct btrfs_file_extent_item *fi;
9377         struct btrfs_key key;
9378         u64 bytenr, bytes;
9379         int ret;
9380
9381         rbtree_postorder_for_each_entry_safe(back, tmp,
9382                                              &rec->backref_tree, node) {
9383                 /* Don't care about full backrefs (poor unloved backrefs) */
9384                 if (back->full_backref || !back->is_data)
9385                         continue;
9386
9387                 dback = to_data_backref(back);
9388
9389                 /* We found this one, we don't need to do a lookup */
9390                 if (dback->found_ref)
9391                         continue;
9392
9393                 key.objectid = dback->root;
9394                 key.type = BTRFS_ROOT_ITEM_KEY;
9395                 key.offset = (u64)-1;
9396
9397                 root = btrfs_read_fs_root(info, &key);
9398
9399                 /* No root, definitely a bad ref, skip */
9400                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9401                         continue;
9402                 /* Other err, exit */
9403                 if (IS_ERR(root))
9404                         return PTR_ERR(root);
9405
9406                 key.objectid = dback->owner;
9407                 key.type = BTRFS_EXTENT_DATA_KEY;
9408                 key.offset = dback->offset;
9409                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9410                 if (ret) {
9411                         btrfs_release_path(path);
9412                         if (ret < 0)
9413                                 return ret;
9414                         /* Didn't find it, we can carry on */
9415                         ret = 0;
9416                         continue;
9417                 }
9418
9419                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9420                                     struct btrfs_file_extent_item);
9421                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9422                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9423                 btrfs_release_path(path);
9424                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9425                 if (cache) {
9426                         struct extent_record *tmp;
9427                         tmp = container_of(cache, struct extent_record, cache);
9428
9429                         /*
9430                          * If we found an extent record for the bytenr for this
9431                          * particular backref then we can't add it to our
9432                          * current extent record.  We only want to add backrefs
9433                          * that don't have a corresponding extent item in the
9434                          * extent tree since they likely belong to this record
9435                          * and we need to fix it if it doesn't match bytenrs.
9436                          */
9437                         if  (tmp->found_rec)
9438                                 continue;
9439                 }
9440
9441                 dback->found_ref += 1;
9442                 dback->disk_bytenr = bytenr;
9443                 dback->bytes = bytes;
9444
9445                 /*
9446                  * Set this so the verify backref code knows not to trust the
9447                  * values in this backref.
9448                  */
9449                 back->broken = 1;
9450         }
9451
9452         return 0;
9453 }
9454
9455 /*
9456  * Record orphan data ref into corresponding root.
9457  *
9458  * Return 0 if the extent item contains data ref and recorded.
9459  * Return 1 if the extent item contains no useful data ref
9460  *   On that case, it may contains only shared_dataref or metadata backref
9461  *   or the file extent exists(this should be handled by the extent bytenr
9462  *   recovery routine)
9463  * Return <0 if something goes wrong.
9464  */
9465 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9466                                       struct extent_record *rec)
9467 {
9468         struct btrfs_key key;
9469         struct btrfs_root *dest_root;
9470         struct extent_backref *back, *tmp;
9471         struct data_backref *dback;
9472         struct orphan_data_extent *orphan;
9473         struct btrfs_path path;
9474         int recorded_data_ref = 0;
9475         int ret = 0;
9476
9477         if (rec->metadata)
9478                 return 1;
9479         btrfs_init_path(&path);
9480         rbtree_postorder_for_each_entry_safe(back, tmp,
9481                                              &rec->backref_tree, node) {
9482                 if (back->full_backref || !back->is_data ||
9483                     !back->found_extent_tree)
9484                         continue;
9485                 dback = to_data_backref(back);
9486                 if (dback->found_ref)
9487                         continue;
9488                 key.objectid = dback->root;
9489                 key.type = BTRFS_ROOT_ITEM_KEY;
9490                 key.offset = (u64)-1;
9491
9492                 dest_root = btrfs_read_fs_root(fs_info, &key);
9493
9494                 /* For non-exist root we just skip it */
9495                 if (IS_ERR(dest_root) || !dest_root)
9496                         continue;
9497
9498                 key.objectid = dback->owner;
9499                 key.type = BTRFS_EXTENT_DATA_KEY;
9500                 key.offset = dback->offset;
9501
9502                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9503                 btrfs_release_path(&path);
9504                 /*
9505                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9506                  * we need to record it for inode/file extent rebuild.
9507                  * For ret > 0, we record it only for file extent rebuild.
9508                  * For ret == 0, the file extent exists but only bytenr
9509                  * mismatch, let the original bytenr fix routine to handle,
9510                  * don't record it.
9511                  */
9512                 if (ret == 0)
9513                         continue;
9514                 ret = 0;
9515                 orphan = malloc(sizeof(*orphan));
9516                 if (!orphan) {
9517                         ret = -ENOMEM;
9518                         goto out;
9519                 }
9520                 INIT_LIST_HEAD(&orphan->list);
9521                 orphan->root = dback->root;
9522                 orphan->objectid = dback->owner;
9523                 orphan->offset = dback->offset;
9524                 orphan->disk_bytenr = rec->cache.start;
9525                 orphan->disk_len = rec->cache.size;
9526                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9527                 recorded_data_ref = 1;
9528         }
9529 out:
9530         btrfs_release_path(&path);
9531         if (!ret)
9532                 return !recorded_data_ref;
9533         else
9534                 return ret;
9535 }
9536
9537 /*
9538  * when an incorrect extent item is found, this will delete
9539  * all of the existing entries for it and recreate them
9540  * based on what the tree scan found.
9541  */
9542 static int fixup_extent_refs(struct btrfs_fs_info *info,
9543                              struct cache_tree *extent_cache,
9544                              struct extent_record *rec)
9545 {
9546         struct btrfs_trans_handle *trans = NULL;
9547         int ret;
9548         struct btrfs_path path;
9549         struct cache_extent *cache;
9550         struct extent_backref *back, *tmp;
9551         int allocated = 0;
9552         u64 flags = 0;
9553
9554         if (rec->flag_block_full_backref)
9555                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9556
9557         btrfs_init_path(&path);
9558         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9559                 /*
9560                  * Sometimes the backrefs themselves are so broken they don't
9561                  * get attached to any meaningful rec, so first go back and
9562                  * check any of our backrefs that we couldn't find and throw
9563                  * them into the list if we find the backref so that
9564                  * verify_backrefs can figure out what to do.
9565                  */
9566                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9567                 if (ret < 0)
9568                         goto out;
9569         }
9570
9571         /* step one, make sure all of the backrefs agree */
9572         ret = verify_backrefs(info, &path, rec);
9573         if (ret < 0)
9574                 goto out;
9575
9576         trans = btrfs_start_transaction(info->extent_root, 1);
9577         if (IS_ERR(trans)) {
9578                 ret = PTR_ERR(trans);
9579                 goto out;
9580         }
9581
9582         /* step two, delete all the existing records */
9583         ret = delete_extent_records(trans, info->extent_root, &path,
9584                                     rec->start);
9585
9586         if (ret < 0)
9587                 goto out;
9588
9589         /* was this block corrupt?  If so, don't add references to it */
9590         cache = lookup_cache_extent(info->corrupt_blocks,
9591                                     rec->start, rec->max_size);
9592         if (cache) {
9593                 ret = 0;
9594                 goto out;
9595         }
9596
9597         /* step three, recreate all the refs we did find */
9598         rbtree_postorder_for_each_entry_safe(back, tmp,
9599                                              &rec->backref_tree, node) {
9600                 /*
9601                  * if we didn't find any references, don't create a
9602                  * new extent record
9603                  */
9604                 if (!back->found_ref)
9605                         continue;
9606
9607                 rec->bad_full_backref = 0;
9608                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9609                 allocated = 1;
9610
9611                 if (ret)
9612                         goto out;
9613         }
9614 out:
9615         if (trans) {
9616                 int err = btrfs_commit_transaction(trans, info->extent_root);
9617                 if (!ret)
9618                         ret = err;
9619         }
9620
9621         if (!ret)
9622                 fprintf(stderr, "Repaired extent references for %llu\n",
9623                                 (unsigned long long)rec->start);
9624
9625         btrfs_release_path(&path);
9626         return ret;
9627 }
9628
9629 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9630                               struct extent_record *rec)
9631 {
9632         struct btrfs_trans_handle *trans;
9633         struct btrfs_root *root = fs_info->extent_root;
9634         struct btrfs_path path;
9635         struct btrfs_extent_item *ei;
9636         struct btrfs_key key;
9637         u64 flags;
9638         int ret = 0;
9639
9640         key.objectid = rec->start;
9641         if (rec->metadata) {
9642                 key.type = BTRFS_METADATA_ITEM_KEY;
9643                 key.offset = rec->info_level;
9644         } else {
9645                 key.type = BTRFS_EXTENT_ITEM_KEY;
9646                 key.offset = rec->max_size;
9647         }
9648
9649         trans = btrfs_start_transaction(root, 0);
9650         if (IS_ERR(trans))
9651                 return PTR_ERR(trans);
9652
9653         btrfs_init_path(&path);
9654         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9655         if (ret < 0) {
9656                 btrfs_release_path(&path);
9657                 btrfs_commit_transaction(trans, root);
9658                 return ret;
9659         } else if (ret) {
9660                 fprintf(stderr, "Didn't find extent for %llu\n",
9661                         (unsigned long long)rec->start);
9662                 btrfs_release_path(&path);
9663                 btrfs_commit_transaction(trans, root);
9664                 return -ENOENT;
9665         }
9666
9667         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9668                             struct btrfs_extent_item);
9669         flags = btrfs_extent_flags(path.nodes[0], ei);
9670         if (rec->flag_block_full_backref) {
9671                 fprintf(stderr, "setting full backref on %llu\n",
9672                         (unsigned long long)key.objectid);
9673                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9674         } else {
9675                 fprintf(stderr, "clearing full backref on %llu\n",
9676                         (unsigned long long)key.objectid);
9677                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9678         }
9679         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9680         btrfs_mark_buffer_dirty(path.nodes[0]);
9681         btrfs_release_path(&path);
9682         ret = btrfs_commit_transaction(trans, root);
9683         if (!ret)
9684                 fprintf(stderr, "Repaired extent flags for %llu\n",
9685                                 (unsigned long long)rec->start);
9686
9687         return ret;
9688 }
9689
9690 /* right now we only prune from the extent allocation tree */
9691 static int prune_one_block(struct btrfs_trans_handle *trans,
9692                            struct btrfs_fs_info *info,
9693                            struct btrfs_corrupt_block *corrupt)
9694 {
9695         int ret;
9696         struct btrfs_path path;
9697         struct extent_buffer *eb;
9698         u64 found;
9699         int slot;
9700         int nritems;
9701         int level = corrupt->level + 1;
9702
9703         btrfs_init_path(&path);
9704 again:
9705         /* we want to stop at the parent to our busted block */
9706         path.lowest_level = level;
9707
9708         ret = btrfs_search_slot(trans, info->extent_root,
9709                                 &corrupt->key, &path, -1, 1);
9710
9711         if (ret < 0)
9712                 goto out;
9713
9714         eb = path.nodes[level];
9715         if (!eb) {
9716                 ret = -ENOENT;
9717                 goto out;
9718         }
9719
9720         /*
9721          * hopefully the search gave us the block we want to prune,
9722          * lets try that first
9723          */
9724         slot = path.slots[level];
9725         found =  btrfs_node_blockptr(eb, slot);
9726         if (found == corrupt->cache.start)
9727                 goto del_ptr;
9728
9729         nritems = btrfs_header_nritems(eb);
9730
9731         /* the search failed, lets scan this node and hope we find it */
9732         for (slot = 0; slot < nritems; slot++) {
9733                 found =  btrfs_node_blockptr(eb, slot);
9734                 if (found == corrupt->cache.start)
9735                         goto del_ptr;
9736         }
9737         /*
9738          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9739          * to this block
9740          */
9741         if (eb == info->extent_root->node) {
9742                 ret = -ENOENT;
9743                 goto out;
9744         } else {
9745                 level++;
9746                 btrfs_release_path(&path);
9747                 goto again;
9748         }
9749
9750 del_ptr:
9751         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9752         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9753
9754 out:
9755         btrfs_release_path(&path);
9756         return ret;
9757 }
9758
9759 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9760 {
9761         struct btrfs_trans_handle *trans = NULL;
9762         struct cache_extent *cache;
9763         struct btrfs_corrupt_block *corrupt;
9764
9765         while (1) {
9766                 cache = search_cache_extent(info->corrupt_blocks, 0);
9767                 if (!cache)
9768                         break;
9769                 if (!trans) {
9770                         trans = btrfs_start_transaction(info->extent_root, 1);
9771                         if (IS_ERR(trans))
9772                                 return PTR_ERR(trans);
9773                 }
9774                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9775                 prune_one_block(trans, info, corrupt);
9776                 remove_cache_extent(info->corrupt_blocks, cache);
9777         }
9778         if (trans)
9779                 return btrfs_commit_transaction(trans, info->extent_root);
9780         return 0;
9781 }
9782
9783 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9784 {
9785         struct btrfs_block_group_cache *cache;
9786         u64 start, end;
9787         int ret;
9788
9789         while (1) {
9790                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9791                                             &start, &end, EXTENT_DIRTY);
9792                 if (ret)
9793                         break;
9794                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9795         }
9796
9797         start = 0;
9798         while (1) {
9799                 cache = btrfs_lookup_first_block_group(fs_info, start);
9800                 if (!cache)
9801                         break;
9802                 if (cache->cached)
9803                         cache->cached = 0;
9804                 start = cache->key.objectid + cache->key.offset;
9805         }
9806 }
9807
9808 static int check_extent_refs(struct btrfs_root *root,
9809                              struct cache_tree *extent_cache)
9810 {
9811         struct extent_record *rec;
9812         struct cache_extent *cache;
9813         int ret = 0;
9814         int had_dups = 0;
9815
9816         if (repair) {
9817                 /*
9818                  * if we're doing a repair, we have to make sure
9819                  * we don't allocate from the problem extents.
9820                  * In the worst case, this will be all the
9821                  * extents in the FS
9822                  */
9823                 cache = search_cache_extent(extent_cache, 0);
9824                 while(cache) {
9825                         rec = container_of(cache, struct extent_record, cache);
9826                         set_extent_dirty(root->fs_info->excluded_extents,
9827                                          rec->start,
9828                                          rec->start + rec->max_size - 1);
9829                         cache = next_cache_extent(cache);
9830                 }
9831
9832                 /* pin down all the corrupted blocks too */
9833                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9834                 while(cache) {
9835                         set_extent_dirty(root->fs_info->excluded_extents,
9836                                          cache->start,
9837                                          cache->start + cache->size - 1);
9838                         cache = next_cache_extent(cache);
9839                 }
9840                 prune_corrupt_blocks(root->fs_info);
9841                 reset_cached_block_groups(root->fs_info);
9842         }
9843
9844         reset_cached_block_groups(root->fs_info);
9845
9846         /*
9847          * We need to delete any duplicate entries we find first otherwise we
9848          * could mess up the extent tree when we have backrefs that actually
9849          * belong to a different extent item and not the weird duplicate one.
9850          */
9851         while (repair && !list_empty(&duplicate_extents)) {
9852                 rec = to_extent_record(duplicate_extents.next);
9853                 list_del_init(&rec->list);
9854
9855                 /* Sometimes we can find a backref before we find an actual
9856                  * extent, so we need to process it a little bit to see if there
9857                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9858                  * if this is a backref screwup.  If we need to delete stuff
9859                  * process_duplicates() will return 0, otherwise it will return
9860                  * 1 and we
9861                  */
9862                 if (process_duplicates(extent_cache, rec))
9863                         continue;
9864                 ret = delete_duplicate_records(root, rec);
9865                 if (ret < 0)
9866                         return ret;
9867                 /*
9868                  * delete_duplicate_records will return the number of entries
9869                  * deleted, so if it's greater than 0 then we know we actually
9870                  * did something and we need to remove.
9871                  */
9872                 if (ret)
9873                         had_dups = 1;
9874         }
9875
9876         if (had_dups)
9877                 return -EAGAIN;
9878
9879         while(1) {
9880                 int cur_err = 0;
9881                 int fix = 0;
9882
9883                 cache = search_cache_extent(extent_cache, 0);
9884                 if (!cache)
9885                         break;
9886                 rec = container_of(cache, struct extent_record, cache);
9887                 if (rec->num_duplicates) {
9888                         fprintf(stderr, "extent item %llu has multiple extent "
9889                                 "items\n", (unsigned long long)rec->start);
9890                         cur_err = 1;
9891                 }
9892
9893                 if (rec->refs != rec->extent_item_refs) {
9894                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9895                                 (unsigned long long)rec->start,
9896                                 (unsigned long long)rec->nr);
9897                         fprintf(stderr, "extent item %llu, found %llu\n",
9898                                 (unsigned long long)rec->extent_item_refs,
9899                                 (unsigned long long)rec->refs);
9900                         ret = record_orphan_data_extents(root->fs_info, rec);
9901                         if (ret < 0)
9902                                 goto repair_abort;
9903                         fix = ret;
9904                         cur_err = 1;
9905                 }
9906                 if (all_backpointers_checked(rec, 1)) {
9907                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9908                                 (unsigned long long)rec->start,
9909                                 (unsigned long long)rec->nr);
9910                         fix = 1;
9911                         cur_err = 1;
9912                 }
9913                 if (!rec->owner_ref_checked) {
9914                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9915                                 (unsigned long long)rec->start,
9916                                 (unsigned long long)rec->nr);
9917                         fix = 1;
9918                         cur_err = 1;
9919                 }
9920
9921                 if (repair && fix) {
9922                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9923                         if (ret)
9924                                 goto repair_abort;
9925                 }
9926
9927
9928                 if (rec->bad_full_backref) {
9929                         fprintf(stderr, "bad full backref, on [%llu]\n",
9930                                 (unsigned long long)rec->start);
9931                         if (repair) {
9932                                 ret = fixup_extent_flags(root->fs_info, rec);
9933                                 if (ret)
9934                                         goto repair_abort;
9935                                 fix = 1;
9936                         }
9937                         cur_err = 1;
9938                 }
9939                 /*
9940                  * Although it's not a extent ref's problem, we reuse this
9941                  * routine for error reporting.
9942                  * No repair function yet.
9943                  */
9944                 if (rec->crossing_stripes) {
9945                         fprintf(stderr,
9946                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9947                                 rec->start, rec->start + rec->max_size);
9948                         cur_err = 1;
9949                 }
9950
9951                 if (rec->wrong_chunk_type) {
9952                         fprintf(stderr,
9953                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9954                                 rec->start, rec->start + rec->max_size);
9955                         cur_err = 1;
9956                 }
9957
9958                 remove_cache_extent(extent_cache, cache);
9959                 free_all_extent_backrefs(rec);
9960                 if (!init_extent_tree && repair && (!cur_err || fix))
9961                         clear_extent_dirty(root->fs_info->excluded_extents,
9962                                            rec->start,
9963                                            rec->start + rec->max_size - 1);
9964                 free(rec);
9965         }
9966 repair_abort:
9967         if (repair) {
9968                 if (ret && ret != -EAGAIN) {
9969                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9970                         exit(1);
9971                 } else if (!ret) {
9972                         struct btrfs_trans_handle *trans;
9973
9974                         root = root->fs_info->extent_root;
9975                         trans = btrfs_start_transaction(root, 1);
9976                         if (IS_ERR(trans)) {
9977                                 ret = PTR_ERR(trans);
9978                                 goto repair_abort;
9979                         }
9980
9981                         ret = btrfs_fix_block_accounting(trans, root);
9982                         if (ret)
9983                                 goto repair_abort;
9984                         ret = btrfs_commit_transaction(trans, root);
9985                         if (ret)
9986                                 goto repair_abort;
9987                 }
9988                 return ret;
9989         }
9990         return 0;
9991 }
9992
9993 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9994 {
9995         u64 stripe_size;
9996
9997         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9998                 stripe_size = length;
9999                 stripe_size /= num_stripes;
10000         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10001                 stripe_size = length * 2;
10002                 stripe_size /= num_stripes;
10003         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10004                 stripe_size = length;
10005                 stripe_size /= (num_stripes - 1);
10006         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10007                 stripe_size = length;
10008                 stripe_size /= (num_stripes - 2);
10009         } else {
10010                 stripe_size = length;
10011         }
10012         return stripe_size;
10013 }
10014
10015 /*
10016  * Check the chunk with its block group/dev list ref:
10017  * Return 0 if all refs seems valid.
10018  * Return 1 if part of refs seems valid, need later check for rebuild ref
10019  * like missing block group and needs to search extent tree to rebuild them.
10020  * Return -1 if essential refs are missing and unable to rebuild.
10021  */
10022 static int check_chunk_refs(struct chunk_record *chunk_rec,
10023                             struct block_group_tree *block_group_cache,
10024                             struct device_extent_tree *dev_extent_cache,
10025                             int silent)
10026 {
10027         struct cache_extent *block_group_item;
10028         struct block_group_record *block_group_rec;
10029         struct cache_extent *dev_extent_item;
10030         struct device_extent_record *dev_extent_rec;
10031         u64 devid;
10032         u64 offset;
10033         u64 length;
10034         int metadump_v2 = 0;
10035         int i;
10036         int ret = 0;
10037
10038         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10039                                                chunk_rec->offset,
10040                                                chunk_rec->length);
10041         if (block_group_item) {
10042                 block_group_rec = container_of(block_group_item,
10043                                                struct block_group_record,
10044                                                cache);
10045                 if (chunk_rec->length != block_group_rec->offset ||
10046                     chunk_rec->offset != block_group_rec->objectid ||
10047                     (!metadump_v2 &&
10048                      chunk_rec->type_flags != block_group_rec->flags)) {
10049                         if (!silent)
10050                                 fprintf(stderr,
10051                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10052                                         chunk_rec->objectid,
10053                                         chunk_rec->type,
10054                                         chunk_rec->offset,
10055                                         chunk_rec->length,
10056                                         chunk_rec->offset,
10057                                         chunk_rec->type_flags,
10058                                         block_group_rec->objectid,
10059                                         block_group_rec->type,
10060                                         block_group_rec->offset,
10061                                         block_group_rec->offset,
10062                                         block_group_rec->objectid,
10063                                         block_group_rec->flags);
10064                         ret = -1;
10065                 } else {
10066                         list_del_init(&block_group_rec->list);
10067                         chunk_rec->bg_rec = block_group_rec;
10068                 }
10069         } else {
10070                 if (!silent)
10071                         fprintf(stderr,
10072                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10073                                 chunk_rec->objectid,
10074                                 chunk_rec->type,
10075                                 chunk_rec->offset,
10076                                 chunk_rec->length,
10077                                 chunk_rec->offset,
10078                                 chunk_rec->type_flags);
10079                 ret = 1;
10080         }
10081
10082         if (metadump_v2)
10083                 return ret;
10084
10085         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10086                                     chunk_rec->num_stripes);
10087         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10088                 devid = chunk_rec->stripes[i].devid;
10089                 offset = chunk_rec->stripes[i].offset;
10090                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10091                                                        devid, offset, length);
10092                 if (dev_extent_item) {
10093                         dev_extent_rec = container_of(dev_extent_item,
10094                                                 struct device_extent_record,
10095                                                 cache);
10096                         if (dev_extent_rec->objectid != devid ||
10097                             dev_extent_rec->offset != offset ||
10098                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10099                             dev_extent_rec->length != length) {
10100                                 if (!silent)
10101                                         fprintf(stderr,
10102                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10103                                                 chunk_rec->objectid,
10104                                                 chunk_rec->type,
10105                                                 chunk_rec->offset,
10106                                                 chunk_rec->stripes[i].devid,
10107                                                 chunk_rec->stripes[i].offset,
10108                                                 dev_extent_rec->objectid,
10109                                                 dev_extent_rec->offset,
10110                                                 dev_extent_rec->length);
10111                                 ret = -1;
10112                         } else {
10113                                 list_move(&dev_extent_rec->chunk_list,
10114                                           &chunk_rec->dextents);
10115                         }
10116                 } else {
10117                         if (!silent)
10118                                 fprintf(stderr,
10119                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10120                                         chunk_rec->objectid,
10121                                         chunk_rec->type,
10122                                         chunk_rec->offset,
10123                                         chunk_rec->stripes[i].devid,
10124                                         chunk_rec->stripes[i].offset);
10125                         ret = -1;
10126                 }
10127         }
10128         return ret;
10129 }
10130
10131 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10132 int check_chunks(struct cache_tree *chunk_cache,
10133                  struct block_group_tree *block_group_cache,
10134                  struct device_extent_tree *dev_extent_cache,
10135                  struct list_head *good, struct list_head *bad,
10136                  struct list_head *rebuild, int silent)
10137 {
10138         struct cache_extent *chunk_item;
10139         struct chunk_record *chunk_rec;
10140         struct block_group_record *bg_rec;
10141         struct device_extent_record *dext_rec;
10142         int err;
10143         int ret = 0;
10144
10145         chunk_item = first_cache_extent(chunk_cache);
10146         while (chunk_item) {
10147                 chunk_rec = container_of(chunk_item, struct chunk_record,
10148                                          cache);
10149                 err = check_chunk_refs(chunk_rec, block_group_cache,
10150                                        dev_extent_cache, silent);
10151                 if (err < 0)
10152                         ret = err;
10153                 if (err == 0 && good)
10154                         list_add_tail(&chunk_rec->list, good);
10155                 if (err > 0 && rebuild)
10156                         list_add_tail(&chunk_rec->list, rebuild);
10157                 if (err < 0 && bad)
10158                         list_add_tail(&chunk_rec->list, bad);
10159                 chunk_item = next_cache_extent(chunk_item);
10160         }
10161
10162         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10163                 if (!silent)
10164                         fprintf(stderr,
10165                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10166                                 bg_rec->objectid,
10167                                 bg_rec->offset,
10168                                 bg_rec->flags);
10169                 if (!ret)
10170                         ret = 1;
10171         }
10172
10173         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10174                             chunk_list) {
10175                 if (!silent)
10176                         fprintf(stderr,
10177                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10178                                 dext_rec->objectid,
10179                                 dext_rec->offset,
10180                                 dext_rec->length);
10181                 if (!ret)
10182                         ret = 1;
10183         }
10184         return ret;
10185 }
10186
10187
10188 static int check_device_used(struct device_record *dev_rec,
10189                              struct device_extent_tree *dext_cache)
10190 {
10191         struct cache_extent *cache;
10192         struct device_extent_record *dev_extent_rec;
10193         u64 total_byte = 0;
10194
10195         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10196         while (cache) {
10197                 dev_extent_rec = container_of(cache,
10198                                               struct device_extent_record,
10199                                               cache);
10200                 if (dev_extent_rec->objectid != dev_rec->devid)
10201                         break;
10202
10203                 list_del_init(&dev_extent_rec->device_list);
10204                 total_byte += dev_extent_rec->length;
10205                 cache = next_cache_extent(cache);
10206         }
10207
10208         if (total_byte != dev_rec->byte_used) {
10209                 fprintf(stderr,
10210                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10211                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10212                         dev_rec->type, dev_rec->offset);
10213                 return -1;
10214         } else {
10215                 return 0;
10216         }
10217 }
10218
10219 /* check btrfs_dev_item -> btrfs_dev_extent */
10220 static int check_devices(struct rb_root *dev_cache,
10221                          struct device_extent_tree *dev_extent_cache)
10222 {
10223         struct rb_node *dev_node;
10224         struct device_record *dev_rec;
10225         struct device_extent_record *dext_rec;
10226         int err;
10227         int ret = 0;
10228
10229         dev_node = rb_first(dev_cache);
10230         while (dev_node) {
10231                 dev_rec = container_of(dev_node, struct device_record, node);
10232                 err = check_device_used(dev_rec, dev_extent_cache);
10233                 if (err)
10234                         ret = err;
10235
10236                 dev_node = rb_next(dev_node);
10237         }
10238         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10239                             device_list) {
10240                 fprintf(stderr,
10241                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10242                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10243                 if (!ret)
10244                         ret = 1;
10245         }
10246         return ret;
10247 }
10248
10249 static int add_root_item_to_list(struct list_head *head,
10250                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10251                                   u8 level, u8 drop_level,
10252                                   struct btrfs_key *drop_key)
10253 {
10254
10255         struct root_item_record *ri_rec;
10256         ri_rec = malloc(sizeof(*ri_rec));
10257         if (!ri_rec)
10258                 return -ENOMEM;
10259         ri_rec->bytenr = bytenr;
10260         ri_rec->objectid = objectid;
10261         ri_rec->level = level;
10262         ri_rec->drop_level = drop_level;
10263         ri_rec->last_snapshot = last_snapshot;
10264         if (drop_key)
10265                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10266         list_add_tail(&ri_rec->list, head);
10267
10268         return 0;
10269 }
10270
10271 static void free_root_item_list(struct list_head *list)
10272 {
10273         struct root_item_record *ri_rec;
10274
10275         while (!list_empty(list)) {
10276                 ri_rec = list_first_entry(list, struct root_item_record,
10277                                           list);
10278                 list_del_init(&ri_rec->list);
10279                 free(ri_rec);
10280         }
10281 }
10282
10283 static int deal_root_from_list(struct list_head *list,
10284                                struct btrfs_root *root,
10285                                struct block_info *bits,
10286                                int bits_nr,
10287                                struct cache_tree *pending,
10288                                struct cache_tree *seen,
10289                                struct cache_tree *reada,
10290                                struct cache_tree *nodes,
10291                                struct cache_tree *extent_cache,
10292                                struct cache_tree *chunk_cache,
10293                                struct rb_root *dev_cache,
10294                                struct block_group_tree *block_group_cache,
10295                                struct device_extent_tree *dev_extent_cache)
10296 {
10297         int ret = 0;
10298         u64 last;
10299
10300         while (!list_empty(list)) {
10301                 struct root_item_record *rec;
10302                 struct extent_buffer *buf;
10303                 rec = list_entry(list->next,
10304                                  struct root_item_record, list);
10305                 last = 0;
10306                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10307                 if (!extent_buffer_uptodate(buf)) {
10308                         free_extent_buffer(buf);
10309                         ret = -EIO;
10310                         break;
10311                 }
10312                 ret = add_root_to_pending(buf, extent_cache, pending,
10313                                     seen, nodes, rec->objectid);
10314                 if (ret < 0)
10315                         break;
10316                 /*
10317                  * To rebuild extent tree, we need deal with snapshot
10318                  * one by one, otherwise we deal with node firstly which
10319                  * can maximize readahead.
10320                  */
10321                 while (1) {
10322                         ret = run_next_block(root, bits, bits_nr, &last,
10323                                              pending, seen, reada, nodes,
10324                                              extent_cache, chunk_cache,
10325                                              dev_cache, block_group_cache,
10326                                              dev_extent_cache, rec);
10327                         if (ret != 0)
10328                                 break;
10329                 }
10330                 free_extent_buffer(buf);
10331                 list_del(&rec->list);
10332                 free(rec);
10333                 if (ret < 0)
10334                         break;
10335         }
10336         while (ret >= 0) {
10337                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10338                                      reada, nodes, extent_cache, chunk_cache,
10339                                      dev_cache, block_group_cache,
10340                                      dev_extent_cache, NULL);
10341                 if (ret != 0) {
10342                         if (ret > 0)
10343                                 ret = 0;
10344                         break;
10345                 }
10346         }
10347         return ret;
10348 }
10349
10350 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10351 {
10352         struct rb_root dev_cache;
10353         struct cache_tree chunk_cache;
10354         struct block_group_tree block_group_cache;
10355         struct device_extent_tree dev_extent_cache;
10356         struct cache_tree extent_cache;
10357         struct cache_tree seen;
10358         struct cache_tree pending;
10359         struct cache_tree reada;
10360         struct cache_tree nodes;
10361         struct extent_io_tree excluded_extents;
10362         struct cache_tree corrupt_blocks;
10363         struct btrfs_path path;
10364         struct btrfs_key key;
10365         struct btrfs_key found_key;
10366         int ret, err = 0;
10367         struct block_info *bits;
10368         int bits_nr;
10369         struct extent_buffer *leaf;
10370         int slot;
10371         struct btrfs_root_item ri;
10372         struct list_head dropping_trees;
10373         struct list_head normal_trees;
10374         struct btrfs_root *root1;
10375         struct btrfs_root *root;
10376         u64 objectid;
10377         u8 level;
10378
10379         root = fs_info->fs_root;
10380         dev_cache = RB_ROOT;
10381         cache_tree_init(&chunk_cache);
10382         block_group_tree_init(&block_group_cache);
10383         device_extent_tree_init(&dev_extent_cache);
10384
10385         cache_tree_init(&extent_cache);
10386         cache_tree_init(&seen);
10387         cache_tree_init(&pending);
10388         cache_tree_init(&nodes);
10389         cache_tree_init(&reada);
10390         cache_tree_init(&corrupt_blocks);
10391         extent_io_tree_init(&excluded_extents);
10392         INIT_LIST_HEAD(&dropping_trees);
10393         INIT_LIST_HEAD(&normal_trees);
10394
10395         if (repair) {
10396                 fs_info->excluded_extents = &excluded_extents;
10397                 fs_info->fsck_extent_cache = &extent_cache;
10398                 fs_info->free_extent_hook = free_extent_hook;
10399                 fs_info->corrupt_blocks = &corrupt_blocks;
10400         }
10401
10402         bits_nr = 1024;
10403         bits = malloc(bits_nr * sizeof(struct block_info));
10404         if (!bits) {
10405                 perror("malloc");
10406                 exit(1);
10407         }
10408
10409         if (ctx.progress_enabled) {
10410                 ctx.tp = TASK_EXTENTS;
10411                 task_start(ctx.info);
10412         }
10413
10414 again:
10415         root1 = fs_info->tree_root;
10416         level = btrfs_header_level(root1->node);
10417         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10418                                     root1->node->start, 0, level, 0, NULL);
10419         if (ret < 0)
10420                 goto out;
10421         root1 = fs_info->chunk_root;
10422         level = btrfs_header_level(root1->node);
10423         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10424                                     root1->node->start, 0, level, 0, NULL);
10425         if (ret < 0)
10426                 goto out;
10427         btrfs_init_path(&path);
10428         key.offset = 0;
10429         key.objectid = 0;
10430         key.type = BTRFS_ROOT_ITEM_KEY;
10431         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10432         if (ret < 0)
10433                 goto out;
10434         while(1) {
10435                 leaf = path.nodes[0];
10436                 slot = path.slots[0];
10437                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10438                         ret = btrfs_next_leaf(root, &path);
10439                         if (ret != 0)
10440                                 break;
10441                         leaf = path.nodes[0];
10442                         slot = path.slots[0];
10443                 }
10444                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10445                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10446                         unsigned long offset;
10447                         u64 last_snapshot;
10448
10449                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10450                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10451                         last_snapshot = btrfs_root_last_snapshot(&ri);
10452                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10453                                 level = btrfs_root_level(&ri);
10454                                 ret = add_root_item_to_list(&normal_trees,
10455                                                 found_key.objectid,
10456                                                 btrfs_root_bytenr(&ri),
10457                                                 last_snapshot, level,
10458                                                 0, NULL);
10459                                 if (ret < 0)
10460                                         goto out;
10461                         } else {
10462                                 level = btrfs_root_level(&ri);
10463                                 objectid = found_key.objectid;
10464                                 btrfs_disk_key_to_cpu(&found_key,
10465                                                       &ri.drop_progress);
10466                                 ret = add_root_item_to_list(&dropping_trees,
10467                                                 objectid,
10468                                                 btrfs_root_bytenr(&ri),
10469                                                 last_snapshot, level,
10470                                                 ri.drop_level, &found_key);
10471                                 if (ret < 0)
10472                                         goto out;
10473                         }
10474                 }
10475                 path.slots[0]++;
10476         }
10477         btrfs_release_path(&path);
10478
10479         /*
10480          * check_block can return -EAGAIN if it fixes something, please keep
10481          * this in mind when dealing with return values from these functions, if
10482          * we get -EAGAIN we want to fall through and restart the loop.
10483          */
10484         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10485                                   &seen, &reada, &nodes, &extent_cache,
10486                                   &chunk_cache, &dev_cache, &block_group_cache,
10487                                   &dev_extent_cache);
10488         if (ret < 0) {
10489                 if (ret == -EAGAIN)
10490                         goto loop;
10491                 goto out;
10492         }
10493         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10494                                   &pending, &seen, &reada, &nodes,
10495                                   &extent_cache, &chunk_cache, &dev_cache,
10496                                   &block_group_cache, &dev_extent_cache);
10497         if (ret < 0) {
10498                 if (ret == -EAGAIN)
10499                         goto loop;
10500                 goto out;
10501         }
10502
10503         ret = check_chunks(&chunk_cache, &block_group_cache,
10504                            &dev_extent_cache, NULL, NULL, NULL, 0);
10505         if (ret) {
10506                 if (ret == -EAGAIN)
10507                         goto loop;
10508                 err = ret;
10509         }
10510
10511         ret = check_extent_refs(root, &extent_cache);
10512         if (ret < 0) {
10513                 if (ret == -EAGAIN)
10514                         goto loop;
10515                 goto out;
10516         }
10517
10518         ret = check_devices(&dev_cache, &dev_extent_cache);
10519         if (ret && err)
10520                 ret = err;
10521
10522 out:
10523         task_stop(ctx.info);
10524         if (repair) {
10525                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10526                 extent_io_tree_cleanup(&excluded_extents);
10527                 fs_info->fsck_extent_cache = NULL;
10528                 fs_info->free_extent_hook = NULL;
10529                 fs_info->corrupt_blocks = NULL;
10530                 fs_info->excluded_extents = NULL;
10531         }
10532         free(bits);
10533         free_chunk_cache_tree(&chunk_cache);
10534         free_device_cache_tree(&dev_cache);
10535         free_block_group_tree(&block_group_cache);
10536         free_device_extent_tree(&dev_extent_cache);
10537         free_extent_cache_tree(&seen);
10538         free_extent_cache_tree(&pending);
10539         free_extent_cache_tree(&reada);
10540         free_extent_cache_tree(&nodes);
10541         free_root_item_list(&normal_trees);
10542         free_root_item_list(&dropping_trees);
10543         return ret;
10544 loop:
10545         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10546         free_extent_cache_tree(&seen);
10547         free_extent_cache_tree(&pending);
10548         free_extent_cache_tree(&reada);
10549         free_extent_cache_tree(&nodes);
10550         free_chunk_cache_tree(&chunk_cache);
10551         free_block_group_tree(&block_group_cache);
10552         free_device_cache_tree(&dev_cache);
10553         free_device_extent_tree(&dev_extent_cache);
10554         free_extent_record_cache(&extent_cache);
10555         free_root_item_list(&normal_trees);
10556         free_root_item_list(&dropping_trees);
10557         extent_io_tree_cleanup(&excluded_extents);
10558         goto again;
10559 }
10560
10561 /*
10562  * Check backrefs of a tree block given by @bytenr or @eb.
10563  *
10564  * @root:       the root containing the @bytenr or @eb
10565  * @eb:         tree block extent buffer, can be NULL
10566  * @bytenr:     bytenr of the tree block to search
10567  * @level:      tree level of the tree block
10568  * @owner:      owner of the tree block
10569  *
10570  * Return >0 for any error found and output error message
10571  * Return 0 for no error found
10572  */
10573 static int check_tree_block_ref(struct btrfs_root *root,
10574                                 struct extent_buffer *eb, u64 bytenr,
10575                                 int level, u64 owner)
10576 {
10577         struct btrfs_key key;
10578         struct btrfs_root *extent_root = root->fs_info->extent_root;
10579         struct btrfs_path path;
10580         struct btrfs_extent_item *ei;
10581         struct btrfs_extent_inline_ref *iref;
10582         struct extent_buffer *leaf;
10583         unsigned long end;
10584         unsigned long ptr;
10585         int slot;
10586         int skinny_level;
10587         int type;
10588         u32 nodesize = root->fs_info->nodesize;
10589         u32 item_size;
10590         u64 offset;
10591         int tree_reloc_root = 0;
10592         int found_ref = 0;
10593         int err = 0;
10594         int ret;
10595
10596         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10597             btrfs_header_bytenr(root->node) == bytenr)
10598                 tree_reloc_root = 1;
10599
10600         btrfs_init_path(&path);
10601         key.objectid = bytenr;
10602         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10603                 key.type = BTRFS_METADATA_ITEM_KEY;
10604         else
10605                 key.type = BTRFS_EXTENT_ITEM_KEY;
10606         key.offset = (u64)-1;
10607
10608         /* Search for the backref in extent tree */
10609         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10610         if (ret < 0) {
10611                 err |= BACKREF_MISSING;
10612                 goto out;
10613         }
10614         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10615         if (ret) {
10616                 err |= BACKREF_MISSING;
10617                 goto out;
10618         }
10619
10620         leaf = path.nodes[0];
10621         slot = path.slots[0];
10622         btrfs_item_key_to_cpu(leaf, &key, slot);
10623
10624         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10625
10626         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10627                 skinny_level = (int)key.offset;
10628                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10629         } else {
10630                 struct btrfs_tree_block_info *info;
10631
10632                 info = (struct btrfs_tree_block_info *)(ei + 1);
10633                 skinny_level = btrfs_tree_block_level(leaf, info);
10634                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10635         }
10636
10637         if (eb) {
10638                 u64 header_gen;
10639                 u64 extent_gen;
10640
10641                 if (!(btrfs_extent_flags(leaf, ei) &
10642                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10643                         error(
10644                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10645                                 key.objectid, nodesize,
10646                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10647                         err = BACKREF_MISMATCH;
10648                 }
10649                 header_gen = btrfs_header_generation(eb);
10650                 extent_gen = btrfs_extent_generation(leaf, ei);
10651                 if (header_gen != extent_gen) {
10652                         error(
10653         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10654                                 key.objectid, nodesize, header_gen,
10655                                 extent_gen);
10656                         err = BACKREF_MISMATCH;
10657                 }
10658                 if (level != skinny_level) {
10659                         error(
10660                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10661                                 key.objectid, nodesize, level, skinny_level);
10662                         err = BACKREF_MISMATCH;
10663                 }
10664                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10665                         error(
10666                         "extent[%llu %u] is referred by other roots than %llu",
10667                                 key.objectid, nodesize, root->objectid);
10668                         err = BACKREF_MISMATCH;
10669                 }
10670         }
10671
10672         /*
10673          * Iterate the extent/metadata item to find the exact backref
10674          */
10675         item_size = btrfs_item_size_nr(leaf, slot);
10676         ptr = (unsigned long)iref;
10677         end = (unsigned long)ei + item_size;
10678         while (ptr < end) {
10679                 iref = (struct btrfs_extent_inline_ref *)ptr;
10680                 type = btrfs_extent_inline_ref_type(leaf, iref);
10681                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10682
10683                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10684                         (offset == root->objectid || offset == owner)) {
10685                         found_ref = 1;
10686                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10687                         /*
10688                          * Backref of tree reloc root points to itself, no need
10689                          * to check backref any more.
10690                          */
10691                         if (tree_reloc_root)
10692                                 found_ref = 1;
10693                         else
10694                         /* Check if the backref points to valid referencer */
10695                                 found_ref = !check_tree_block_ref(root, NULL,
10696                                                 offset, level + 1, owner);
10697                 }
10698
10699                 if (found_ref)
10700                         break;
10701                 ptr += btrfs_extent_inline_ref_size(type);
10702         }
10703
10704         /*
10705          * Inlined extent item doesn't have what we need, check
10706          * TREE_BLOCK_REF_KEY
10707          */
10708         if (!found_ref) {
10709                 btrfs_release_path(&path);
10710                 key.objectid = bytenr;
10711                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10712                 key.offset = root->objectid;
10713
10714                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10715                 if (!ret)
10716                         found_ref = 1;
10717         }
10718         if (!found_ref)
10719                 err |= BACKREF_MISSING;
10720 out:
10721         btrfs_release_path(&path);
10722         if (eb && (err & BACKREF_MISSING))
10723                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10724                         bytenr, nodesize, owner, level);
10725         return err;
10726 }
10727
10728 /*
10729  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10730  *
10731  * Return >0 any error found and output error message
10732  * Return 0 for no error found
10733  */
10734 static int check_extent_data_item(struct btrfs_root *root,
10735                                   struct extent_buffer *eb, int slot)
10736 {
10737         struct btrfs_file_extent_item *fi;
10738         struct btrfs_path path;
10739         struct btrfs_root *extent_root = root->fs_info->extent_root;
10740         struct btrfs_key fi_key;
10741         struct btrfs_key dbref_key;
10742         struct extent_buffer *leaf;
10743         struct btrfs_extent_item *ei;
10744         struct btrfs_extent_inline_ref *iref;
10745         struct btrfs_extent_data_ref *dref;
10746         u64 owner;
10747         u64 disk_bytenr;
10748         u64 disk_num_bytes;
10749         u64 extent_num_bytes;
10750         u64 extent_flags;
10751         u32 item_size;
10752         unsigned long end;
10753         unsigned long ptr;
10754         int type;
10755         u64 ref_root;
10756         int found_dbackref = 0;
10757         int err = 0;
10758         int ret;
10759
10760         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10761         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10762
10763         /* Nothing to check for hole and inline data extents */
10764         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10765             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10766                 return 0;
10767
10768         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10769         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10770         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10771
10772         /* Check unaligned disk_num_bytes and num_bytes */
10773         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10774                 error(
10775 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10776                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10777                         root->fs_info->sectorsize);
10778                 err |= BYTES_UNALIGNED;
10779         } else {
10780                 data_bytes_allocated += disk_num_bytes;
10781         }
10782         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10783                 error(
10784 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10785                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10786                         root->fs_info->sectorsize);
10787                 err |= BYTES_UNALIGNED;
10788         } else {
10789                 data_bytes_referenced += extent_num_bytes;
10790         }
10791         owner = btrfs_header_owner(eb);
10792
10793         /* Check the extent item of the file extent in extent tree */
10794         btrfs_init_path(&path);
10795         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10796         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10797         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10798
10799         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10800         if (ret)
10801                 goto out;
10802
10803         leaf = path.nodes[0];
10804         slot = path.slots[0];
10805         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10806
10807         extent_flags = btrfs_extent_flags(leaf, ei);
10808
10809         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10810                 error(
10811                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10812                     disk_bytenr, disk_num_bytes,
10813                     BTRFS_EXTENT_FLAG_DATA);
10814                 err |= BACKREF_MISMATCH;
10815         }
10816
10817         /* Check data backref inside that extent item */
10818         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10819         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10820         ptr = (unsigned long)iref;
10821         end = (unsigned long)ei + item_size;
10822         while (ptr < end) {
10823                 iref = (struct btrfs_extent_inline_ref *)ptr;
10824                 type = btrfs_extent_inline_ref_type(leaf, iref);
10825                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10826
10827                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10828                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10829                         if (ref_root == owner || ref_root == root->objectid)
10830                                 found_dbackref = 1;
10831                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10832                         found_dbackref = !check_tree_block_ref(root, NULL,
10833                                 btrfs_extent_inline_ref_offset(leaf, iref),
10834                                 0, owner);
10835                 }
10836
10837                 if (found_dbackref)
10838                         break;
10839                 ptr += btrfs_extent_inline_ref_size(type);
10840         }
10841
10842         if (!found_dbackref) {
10843                 btrfs_release_path(&path);
10844
10845                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10846                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10847                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10848                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10849                                 fi_key.objectid, fi_key.offset);
10850
10851                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10852                                         &dbref_key, &path, 0, 0);
10853                 if (!ret) {
10854                         found_dbackref = 1;
10855                         goto out;
10856                 }
10857
10858                 btrfs_release_path(&path);
10859
10860                 /*
10861                  * Neither inlined nor EXTENT_DATA_REF found, try
10862                  * SHARED_DATA_REF as last chance.
10863                  */
10864                 dbref_key.objectid = disk_bytenr;
10865                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10866                 dbref_key.offset = eb->start;
10867
10868                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10869                                         &dbref_key, &path, 0, 0);
10870                 if (!ret) {
10871                         found_dbackref = 1;
10872                         goto out;
10873                 }
10874         }
10875
10876 out:
10877         if (!found_dbackref)
10878                 err |= BACKREF_MISSING;
10879         btrfs_release_path(&path);
10880         if (err & BACKREF_MISSING) {
10881                 error("data extent[%llu %llu] backref lost",
10882                       disk_bytenr, disk_num_bytes);
10883         }
10884         return err;
10885 }
10886
10887 /*
10888  * Get real tree block level for the case like shared block
10889  * Return >= 0 as tree level
10890  * Return <0 for error
10891  */
10892 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10893 {
10894         struct extent_buffer *eb;
10895         struct btrfs_path path;
10896         struct btrfs_key key;
10897         struct btrfs_extent_item *ei;
10898         u64 flags;
10899         u64 transid;
10900         u8 backref_level;
10901         u8 header_level;
10902         int ret;
10903
10904         /* Search extent tree for extent generation and level */
10905         key.objectid = bytenr;
10906         key.type = BTRFS_METADATA_ITEM_KEY;
10907         key.offset = (u64)-1;
10908
10909         btrfs_init_path(&path);
10910         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10911         if (ret < 0)
10912                 goto release_out;
10913         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10914         if (ret < 0)
10915                 goto release_out;
10916         if (ret > 0) {
10917                 ret = -ENOENT;
10918                 goto release_out;
10919         }
10920
10921         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10922         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10923                             struct btrfs_extent_item);
10924         flags = btrfs_extent_flags(path.nodes[0], ei);
10925         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10926                 ret = -ENOENT;
10927                 goto release_out;
10928         }
10929
10930         /* Get transid for later read_tree_block() check */
10931         transid = btrfs_extent_generation(path.nodes[0], ei);
10932
10933         /* Get backref level as one source */
10934         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10935                 backref_level = key.offset;
10936         } else {
10937                 struct btrfs_tree_block_info *info;
10938
10939                 info = (struct btrfs_tree_block_info *)(ei + 1);
10940                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10941         }
10942         btrfs_release_path(&path);
10943
10944         /* Get level from tree block as an alternative source */
10945         eb = read_tree_block(fs_info, bytenr, transid);
10946         if (!extent_buffer_uptodate(eb)) {
10947                 free_extent_buffer(eb);
10948                 return -EIO;
10949         }
10950         header_level = btrfs_header_level(eb);
10951         free_extent_buffer(eb);
10952
10953         if (header_level != backref_level)
10954                 return -EIO;
10955         return header_level;
10956
10957 release_out:
10958         btrfs_release_path(&path);
10959         return ret;
10960 }
10961
10962 /*
10963  * Check if a tree block backref is valid (points to a valid tree block)
10964  * if level == -1, level will be resolved
10965  * Return >0 for any error found and print error message
10966  */
10967 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10968                                     u64 bytenr, int level)
10969 {
10970         struct btrfs_root *root;
10971         struct btrfs_key key;
10972         struct btrfs_path path;
10973         struct extent_buffer *eb;
10974         struct extent_buffer *node;
10975         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10976         int err = 0;
10977         int ret;
10978
10979         /* Query level for level == -1 special case */
10980         if (level == -1)
10981                 level = query_tree_block_level(fs_info, bytenr);
10982         if (level < 0) {
10983                 err |= REFERENCER_MISSING;
10984                 goto out;
10985         }
10986
10987         key.objectid = root_id;
10988         key.type = BTRFS_ROOT_ITEM_KEY;
10989         key.offset = (u64)-1;
10990
10991         root = btrfs_read_fs_root(fs_info, &key);
10992         if (IS_ERR(root)) {
10993                 err |= REFERENCER_MISSING;
10994                 goto out;
10995         }
10996
10997         /* Read out the tree block to get item/node key */
10998         eb = read_tree_block(fs_info, bytenr, 0);
10999         if (!extent_buffer_uptodate(eb)) {
11000                 err |= REFERENCER_MISSING;
11001                 free_extent_buffer(eb);
11002                 goto out;
11003         }
11004
11005         /* Empty tree, no need to check key */
11006         if (!btrfs_header_nritems(eb) && !level) {
11007                 free_extent_buffer(eb);
11008                 goto out;
11009         }
11010
11011         if (level)
11012                 btrfs_node_key_to_cpu(eb, &key, 0);
11013         else
11014                 btrfs_item_key_to_cpu(eb, &key, 0);
11015
11016         free_extent_buffer(eb);
11017
11018         btrfs_init_path(&path);
11019         path.lowest_level = level;
11020         /* Search with the first key, to ensure we can reach it */
11021         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11022         if (ret < 0) {
11023                 err |= REFERENCER_MISSING;
11024                 goto release_out;
11025         }
11026
11027         node = path.nodes[level];
11028         if (btrfs_header_bytenr(node) != bytenr) {
11029                 error(
11030         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11031                         bytenr, nodesize, bytenr,
11032                         btrfs_header_bytenr(node));
11033                 err |= REFERENCER_MISMATCH;
11034         }
11035         if (btrfs_header_level(node) != level) {
11036                 error(
11037         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11038                         bytenr, nodesize, level,
11039                         btrfs_header_level(node));
11040                 err |= REFERENCER_MISMATCH;
11041         }
11042
11043 release_out:
11044         btrfs_release_path(&path);
11045 out:
11046         if (err & REFERENCER_MISSING) {
11047                 if (level < 0)
11048                         error("extent [%llu %d] lost referencer (owner: %llu)",
11049                                 bytenr, nodesize, root_id);
11050                 else
11051                         error(
11052                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11053                                 bytenr, nodesize, root_id, level);
11054         }
11055
11056         return err;
11057 }
11058
11059 /*
11060  * Check if tree block @eb is tree reloc root.
11061  * Return 0 if it's not or any problem happens
11062  * Return 1 if it's a tree reloc root
11063  */
11064 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11065                                  struct extent_buffer *eb)
11066 {
11067         struct btrfs_root *tree_reloc_root;
11068         struct btrfs_key key;
11069         u64 bytenr = btrfs_header_bytenr(eb);
11070         u64 owner = btrfs_header_owner(eb);
11071         int ret = 0;
11072
11073         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11074         key.offset = owner;
11075         key.type = BTRFS_ROOT_ITEM_KEY;
11076
11077         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11078         if (IS_ERR(tree_reloc_root))
11079                 return 0;
11080
11081         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11082                 ret = 1;
11083         btrfs_free_fs_root(tree_reloc_root);
11084         return ret;
11085 }
11086
11087 /*
11088  * Check referencer for shared block backref
11089  * If level == -1, this function will resolve the level.
11090  */
11091 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11092                                      u64 parent, u64 bytenr, int level)
11093 {
11094         struct extent_buffer *eb;
11095         u32 nr;
11096         int found_parent = 0;
11097         int i;
11098
11099         eb = read_tree_block(fs_info, parent, 0);
11100         if (!extent_buffer_uptodate(eb))
11101                 goto out;
11102
11103         if (level == -1)
11104                 level = query_tree_block_level(fs_info, bytenr);
11105         if (level < 0)
11106                 goto out;
11107
11108         /* It's possible it's a tree reloc root */
11109         if (parent == bytenr) {
11110                 if (is_tree_reloc_root(fs_info, eb))
11111                         found_parent = 1;
11112                 goto out;
11113         }
11114
11115         if (level + 1 != btrfs_header_level(eb))
11116                 goto out;
11117
11118         nr = btrfs_header_nritems(eb);
11119         for (i = 0; i < nr; i++) {
11120                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11121                         found_parent = 1;
11122                         break;
11123                 }
11124         }
11125 out:
11126         free_extent_buffer(eb);
11127         if (!found_parent) {
11128                 error(
11129         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11130                         bytenr, fs_info->nodesize, parent, level);
11131                 return REFERENCER_MISSING;
11132         }
11133         return 0;
11134 }
11135
11136 /*
11137  * Check referencer for normal (inlined) data ref
11138  * If len == 0, it will be resolved by searching in extent tree
11139  */
11140 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11141                                      u64 root_id, u64 objectid, u64 offset,
11142                                      u64 bytenr, u64 len, u32 count)
11143 {
11144         struct btrfs_root *root;
11145         struct btrfs_root *extent_root = fs_info->extent_root;
11146         struct btrfs_key key;
11147         struct btrfs_path path;
11148         struct extent_buffer *leaf;
11149         struct btrfs_file_extent_item *fi;
11150         u32 found_count = 0;
11151         int slot;
11152         int ret = 0;
11153
11154         if (!len) {
11155                 key.objectid = bytenr;
11156                 key.type = BTRFS_EXTENT_ITEM_KEY;
11157                 key.offset = (u64)-1;
11158
11159                 btrfs_init_path(&path);
11160                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11161                 if (ret < 0)
11162                         goto out;
11163                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11164                 if (ret)
11165                         goto out;
11166                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11167                 if (key.objectid != bytenr ||
11168                     key.type != BTRFS_EXTENT_ITEM_KEY)
11169                         goto out;
11170                 len = key.offset;
11171                 btrfs_release_path(&path);
11172         }
11173         key.objectid = root_id;
11174         key.type = BTRFS_ROOT_ITEM_KEY;
11175         key.offset = (u64)-1;
11176         btrfs_init_path(&path);
11177
11178         root = btrfs_read_fs_root(fs_info, &key);
11179         if (IS_ERR(root))
11180                 goto out;
11181
11182         key.objectid = objectid;
11183         key.type = BTRFS_EXTENT_DATA_KEY;
11184         /*
11185          * It can be nasty as data backref offset is
11186          * file offset - file extent offset, which is smaller or
11187          * equal to original backref offset.  The only special case is
11188          * overflow.  So we need to special check and do further search.
11189          */
11190         key.offset = offset & (1ULL << 63) ? 0 : offset;
11191
11192         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11193         if (ret < 0)
11194                 goto out;
11195
11196         /*
11197          * Search afterwards to get correct one
11198          * NOTE: As we must do a comprehensive check on the data backref to
11199          * make sure the dref count also matches, we must iterate all file
11200          * extents for that inode.
11201          */
11202         while (1) {
11203                 leaf = path.nodes[0];
11204                 slot = path.slots[0];
11205
11206                 if (slot >= btrfs_header_nritems(leaf))
11207                         goto next;
11208                 btrfs_item_key_to_cpu(leaf, &key, slot);
11209                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11210                         break;
11211                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11212                 /*
11213                  * Except normal disk bytenr and disk num bytes, we still
11214                  * need to do extra check on dbackref offset as
11215                  * dbackref offset = file_offset - file_extent_offset
11216                  */
11217                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11218                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11219                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11220                     offset)
11221                         found_count++;
11222
11223 next:
11224                 ret = btrfs_next_item(root, &path);
11225                 if (ret)
11226                         break;
11227         }
11228 out:
11229         btrfs_release_path(&path);
11230         if (found_count != count) {
11231                 error(
11232 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11233                         bytenr, len, root_id, objectid, offset, count, found_count);
11234                 return REFERENCER_MISSING;
11235         }
11236         return 0;
11237 }
11238
11239 /*
11240  * Check if the referencer of a shared data backref exists
11241  */
11242 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11243                                      u64 parent, u64 bytenr)
11244 {
11245         struct extent_buffer *eb;
11246         struct btrfs_key key;
11247         struct btrfs_file_extent_item *fi;
11248         u32 nr;
11249         int found_parent = 0;
11250         int i;
11251
11252         eb = read_tree_block(fs_info, parent, 0);
11253         if (!extent_buffer_uptodate(eb))
11254                 goto out;
11255
11256         nr = btrfs_header_nritems(eb);
11257         for (i = 0; i < nr; i++) {
11258                 btrfs_item_key_to_cpu(eb, &key, i);
11259                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11260                         continue;
11261
11262                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11263                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11264                         continue;
11265
11266                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11267                         found_parent = 1;
11268                         break;
11269                 }
11270         }
11271
11272 out:
11273         free_extent_buffer(eb);
11274         if (!found_parent) {
11275                 error("shared extent %llu referencer lost (parent: %llu)",
11276                         bytenr, parent);
11277                 return REFERENCER_MISSING;
11278         }
11279         return 0;
11280 }
11281
11282 /*
11283  * This function will check a given extent item, including its backref and
11284  * itself (like crossing stripe boundary and type)
11285  *
11286  * Since we don't use extent_record anymore, introduce new error bit
11287  */
11288 static int check_extent_item(struct btrfs_fs_info *fs_info,
11289                              struct extent_buffer *eb, int slot)
11290 {
11291         struct btrfs_extent_item *ei;
11292         struct btrfs_extent_inline_ref *iref;
11293         struct btrfs_extent_data_ref *dref;
11294         unsigned long end;
11295         unsigned long ptr;
11296         int type;
11297         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11298         u32 item_size = btrfs_item_size_nr(eb, slot);
11299         u64 flags;
11300         u64 offset;
11301         int metadata = 0;
11302         int level;
11303         struct btrfs_key key;
11304         int ret;
11305         int err = 0;
11306
11307         btrfs_item_key_to_cpu(eb, &key, slot);
11308         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11309                 bytes_used += key.offset;
11310         else
11311                 bytes_used += nodesize;
11312
11313         if (item_size < sizeof(*ei)) {
11314                 /*
11315                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11316                  * old thing when on disk format is still un-determined.
11317                  * No need to care about it anymore
11318                  */
11319                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11320                 return -ENOTTY;
11321         }
11322
11323         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11324         flags = btrfs_extent_flags(eb, ei);
11325
11326         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11327                 metadata = 1;
11328         if (metadata && check_crossing_stripes(global_info, key.objectid,
11329                                                eb->len)) {
11330                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11331                       key.objectid, key.objectid + nodesize);
11332                 err |= CROSSING_STRIPE_BOUNDARY;
11333         }
11334
11335         ptr = (unsigned long)(ei + 1);
11336
11337         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11338                 /* Old EXTENT_ITEM metadata */
11339                 struct btrfs_tree_block_info *info;
11340
11341                 info = (struct btrfs_tree_block_info *)ptr;
11342                 level = btrfs_tree_block_level(eb, info);
11343                 ptr += sizeof(struct btrfs_tree_block_info);
11344         } else {
11345                 /* New METADATA_ITEM */
11346                 level = key.offset;
11347         }
11348         end = (unsigned long)ei + item_size;
11349
11350 next:
11351         /* Reached extent item end normally */
11352         if (ptr == end)
11353                 goto out;
11354
11355         /* Beyond extent item end, wrong item size */
11356         if (ptr > end) {
11357                 err |= ITEM_SIZE_MISMATCH;
11358                 error("extent item at bytenr %llu slot %d has wrong size",
11359                         eb->start, slot);
11360                 goto out;
11361         }
11362
11363         /* Now check every backref in this extent item */
11364         iref = (struct btrfs_extent_inline_ref *)ptr;
11365         type = btrfs_extent_inline_ref_type(eb, iref);
11366         offset = btrfs_extent_inline_ref_offset(eb, iref);
11367         switch (type) {
11368         case BTRFS_TREE_BLOCK_REF_KEY:
11369                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11370                                                level);
11371                 err |= ret;
11372                 break;
11373         case BTRFS_SHARED_BLOCK_REF_KEY:
11374                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11375                                                  level);
11376                 err |= ret;
11377                 break;
11378         case BTRFS_EXTENT_DATA_REF_KEY:
11379                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11380                 ret = check_extent_data_backref(fs_info,
11381                                 btrfs_extent_data_ref_root(eb, dref),
11382                                 btrfs_extent_data_ref_objectid(eb, dref),
11383                                 btrfs_extent_data_ref_offset(eb, dref),
11384                                 key.objectid, key.offset,
11385                                 btrfs_extent_data_ref_count(eb, dref));
11386                 err |= ret;
11387                 break;
11388         case BTRFS_SHARED_DATA_REF_KEY:
11389                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11390                 err |= ret;
11391                 break;
11392         default:
11393                 error("extent[%llu %d %llu] has unknown ref type: %d",
11394                         key.objectid, key.type, key.offset, type);
11395                 err |= UNKNOWN_TYPE;
11396                 goto out;
11397         }
11398
11399         ptr += btrfs_extent_inline_ref_size(type);
11400         goto next;
11401
11402 out:
11403         return err;
11404 }
11405
11406 /*
11407  * Check if a dev extent item is referred correctly by its chunk
11408  */
11409 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11410                                  struct extent_buffer *eb, int slot)
11411 {
11412         struct btrfs_root *chunk_root = fs_info->chunk_root;
11413         struct btrfs_dev_extent *ptr;
11414         struct btrfs_path path;
11415         struct btrfs_key chunk_key;
11416         struct btrfs_key devext_key;
11417         struct btrfs_chunk *chunk;
11418         struct extent_buffer *l;
11419         int num_stripes;
11420         u64 length;
11421         int i;
11422         int found_chunk = 0;
11423         int ret;
11424
11425         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11426         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11427         length = btrfs_dev_extent_length(eb, ptr);
11428
11429         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11430         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11431         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11432
11433         btrfs_init_path(&path);
11434         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11435         if (ret)
11436                 goto out;
11437
11438         l = path.nodes[0];
11439         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11440         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11441                                       chunk_key.offset);
11442         if (ret < 0)
11443                 goto out;
11444
11445         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11446                 goto out;
11447
11448         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11449         for (i = 0; i < num_stripes; i++) {
11450                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11451                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11452
11453                 if (devid == devext_key.objectid &&
11454                     offset == devext_key.offset) {
11455                         found_chunk = 1;
11456                         break;
11457                 }
11458         }
11459 out:
11460         btrfs_release_path(&path);
11461         if (!found_chunk) {
11462                 error(
11463                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11464                         devext_key.objectid, devext_key.offset, length);
11465                 return REFERENCER_MISSING;
11466         }
11467         return 0;
11468 }
11469
11470 /*
11471  * Check if the used space is correct with the dev item
11472  */
11473 static int check_dev_item(struct btrfs_fs_info *fs_info,
11474                           struct extent_buffer *eb, int slot)
11475 {
11476         struct btrfs_root *dev_root = fs_info->dev_root;
11477         struct btrfs_dev_item *dev_item;
11478         struct btrfs_path path;
11479         struct btrfs_key key;
11480         struct btrfs_dev_extent *ptr;
11481         u64 dev_id;
11482         u64 used;
11483         u64 total = 0;
11484         int ret;
11485
11486         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11487         dev_id = btrfs_device_id(eb, dev_item);
11488         used = btrfs_device_bytes_used(eb, dev_item);
11489
11490         key.objectid = dev_id;
11491         key.type = BTRFS_DEV_EXTENT_KEY;
11492         key.offset = 0;
11493
11494         btrfs_init_path(&path);
11495         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11496         if (ret < 0) {
11497                 btrfs_item_key_to_cpu(eb, &key, slot);
11498                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11499                         key.objectid, key.type, key.offset);
11500                 btrfs_release_path(&path);
11501                 return REFERENCER_MISSING;
11502         }
11503
11504         /* Iterate dev_extents to calculate the used space of a device */
11505         while (1) {
11506                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11507                         goto next;
11508
11509                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11510                 if (key.objectid > dev_id)
11511                         break;
11512                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11513                         goto next;
11514
11515                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11516                                      struct btrfs_dev_extent);
11517                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11518 next:
11519                 ret = btrfs_next_item(dev_root, &path);
11520                 if (ret)
11521                         break;
11522         }
11523         btrfs_release_path(&path);
11524
11525         if (used != total) {
11526                 btrfs_item_key_to_cpu(eb, &key, slot);
11527                 error(
11528 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11529                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11530                         BTRFS_DEV_EXTENT_KEY, dev_id);
11531                 return ACCOUNTING_MISMATCH;
11532         }
11533         return 0;
11534 }
11535
11536 /*
11537  * Check a block group item with its referener (chunk) and its used space
11538  * with extent/metadata item
11539  */
11540 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11541                                   struct extent_buffer *eb, int slot)
11542 {
11543         struct btrfs_root *extent_root = fs_info->extent_root;
11544         struct btrfs_root *chunk_root = fs_info->chunk_root;
11545         struct btrfs_block_group_item *bi;
11546         struct btrfs_block_group_item bg_item;
11547         struct btrfs_path path;
11548         struct btrfs_key bg_key;
11549         struct btrfs_key chunk_key;
11550         struct btrfs_key extent_key;
11551         struct btrfs_chunk *chunk;
11552         struct extent_buffer *leaf;
11553         struct btrfs_extent_item *ei;
11554         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11555         u64 flags;
11556         u64 bg_flags;
11557         u64 used;
11558         u64 total = 0;
11559         int ret;
11560         int err = 0;
11561
11562         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11563         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11564         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11565         used = btrfs_block_group_used(&bg_item);
11566         bg_flags = btrfs_block_group_flags(&bg_item);
11567
11568         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11569         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11570         chunk_key.offset = bg_key.objectid;
11571
11572         btrfs_init_path(&path);
11573         /* Search for the referencer chunk */
11574         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11575         if (ret) {
11576                 error(
11577                 "block group[%llu %llu] did not find the related chunk item",
11578                         bg_key.objectid, bg_key.offset);
11579                 err |= REFERENCER_MISSING;
11580         } else {
11581                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11582                                         struct btrfs_chunk);
11583                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11584                                                 bg_key.offset) {
11585                         error(
11586         "block group[%llu %llu] related chunk item length does not match",
11587                                 bg_key.objectid, bg_key.offset);
11588                         err |= REFERENCER_MISMATCH;
11589                 }
11590         }
11591         btrfs_release_path(&path);
11592
11593         /* Search from the block group bytenr */
11594         extent_key.objectid = bg_key.objectid;
11595         extent_key.type = 0;
11596         extent_key.offset = 0;
11597
11598         btrfs_init_path(&path);
11599         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11600         if (ret < 0)
11601                 goto out;
11602
11603         /* Iterate extent tree to account used space */
11604         while (1) {
11605                 leaf = path.nodes[0];
11606
11607                 /* Search slot can point to the last item beyond leaf nritems */
11608                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11609                         goto next;
11610
11611                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11612                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11613                         break;
11614
11615                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11616                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11617                         goto next;
11618                 if (extent_key.objectid < bg_key.objectid)
11619                         goto next;
11620
11621                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11622                         total += nodesize;
11623                 else
11624                         total += extent_key.offset;
11625
11626                 ei = btrfs_item_ptr(leaf, path.slots[0],
11627                                     struct btrfs_extent_item);
11628                 flags = btrfs_extent_flags(leaf, ei);
11629                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11630                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11631                                 error(
11632                         "bad extent[%llu, %llu) type mismatch with chunk",
11633                                         extent_key.objectid,
11634                                         extent_key.objectid + extent_key.offset);
11635                                 err |= CHUNK_TYPE_MISMATCH;
11636                         }
11637                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11638                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11639                                     BTRFS_BLOCK_GROUP_METADATA))) {
11640                                 error(
11641                         "bad extent[%llu, %llu) type mismatch with chunk",
11642                                         extent_key.objectid,
11643                                         extent_key.objectid + nodesize);
11644                                 err |= CHUNK_TYPE_MISMATCH;
11645                         }
11646                 }
11647 next:
11648                 ret = btrfs_next_item(extent_root, &path);
11649                 if (ret)
11650                         break;
11651         }
11652
11653 out:
11654         btrfs_release_path(&path);
11655
11656         if (total != used) {
11657                 error(
11658                 "block group[%llu %llu] used %llu but extent items used %llu",
11659                         bg_key.objectid, bg_key.offset, used, total);
11660                 err |= ACCOUNTING_MISMATCH;
11661         }
11662         return err;
11663 }
11664
11665 /*
11666  * Check a chunk item.
11667  * Including checking all referred dev_extents and block group
11668  */
11669 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11670                             struct extent_buffer *eb, int slot)
11671 {
11672         struct btrfs_root *extent_root = fs_info->extent_root;
11673         struct btrfs_root *dev_root = fs_info->dev_root;
11674         struct btrfs_path path;
11675         struct btrfs_key chunk_key;
11676         struct btrfs_key bg_key;
11677         struct btrfs_key devext_key;
11678         struct btrfs_chunk *chunk;
11679         struct extent_buffer *leaf;
11680         struct btrfs_block_group_item *bi;
11681         struct btrfs_block_group_item bg_item;
11682         struct btrfs_dev_extent *ptr;
11683         u64 length;
11684         u64 chunk_end;
11685         u64 stripe_len;
11686         u64 type;
11687         int num_stripes;
11688         u64 offset;
11689         u64 objectid;
11690         int i;
11691         int ret;
11692         int err = 0;
11693
11694         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11695         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11696         length = btrfs_chunk_length(eb, chunk);
11697         chunk_end = chunk_key.offset + length;
11698         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11699                                       chunk_key.offset);
11700         if (ret < 0) {
11701                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11702                         chunk_end);
11703                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11704                 goto out;
11705         }
11706         type = btrfs_chunk_type(eb, chunk);
11707
11708         bg_key.objectid = chunk_key.offset;
11709         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11710         bg_key.offset = length;
11711
11712         btrfs_init_path(&path);
11713         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11714         if (ret) {
11715                 error(
11716                 "chunk[%llu %llu) did not find the related block group item",
11717                         chunk_key.offset, chunk_end);
11718                 err |= REFERENCER_MISSING;
11719         } else{
11720                 leaf = path.nodes[0];
11721                 bi = btrfs_item_ptr(leaf, path.slots[0],
11722                                     struct btrfs_block_group_item);
11723                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11724                                    sizeof(bg_item));
11725                 if (btrfs_block_group_flags(&bg_item) != type) {
11726                         error(
11727 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11728                                 chunk_key.offset, chunk_end, type,
11729                                 btrfs_block_group_flags(&bg_item));
11730                         err |= REFERENCER_MISSING;
11731                 }
11732         }
11733
11734         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11735         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11736         for (i = 0; i < num_stripes; i++) {
11737                 btrfs_release_path(&path);
11738                 btrfs_init_path(&path);
11739                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11740                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11741                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11742
11743                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11744                                         0, 0);
11745                 if (ret)
11746                         goto not_match_dev;
11747
11748                 leaf = path.nodes[0];
11749                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11750                                      struct btrfs_dev_extent);
11751                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11752                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11753                 if (objectid != chunk_key.objectid ||
11754                     offset != chunk_key.offset ||
11755                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11756                         goto not_match_dev;
11757                 continue;
11758 not_match_dev:
11759                 err |= BACKREF_MISSING;
11760                 error(
11761                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11762                         chunk_key.objectid, chunk_end, i);
11763                 continue;
11764         }
11765         btrfs_release_path(&path);
11766 out:
11767         return err;
11768 }
11769
11770 /*
11771  * Main entry function to check known items and update related accounting info
11772  */
11773 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11774 {
11775         struct btrfs_fs_info *fs_info = root->fs_info;
11776         struct btrfs_key key;
11777         int slot = 0;
11778         int type;
11779         struct btrfs_extent_data_ref *dref;
11780         int ret;
11781         int err = 0;
11782
11783 next:
11784         btrfs_item_key_to_cpu(eb, &key, slot);
11785         type = key.type;
11786
11787         switch (type) {
11788         case BTRFS_EXTENT_DATA_KEY:
11789                 ret = check_extent_data_item(root, eb, slot);
11790                 err |= ret;
11791                 break;
11792         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11793                 ret = check_block_group_item(fs_info, eb, slot);
11794                 err |= ret;
11795                 break;
11796         case BTRFS_DEV_ITEM_KEY:
11797                 ret = check_dev_item(fs_info, eb, slot);
11798                 err |= ret;
11799                 break;
11800         case BTRFS_CHUNK_ITEM_KEY:
11801                 ret = check_chunk_item(fs_info, eb, slot);
11802                 err |= ret;
11803                 break;
11804         case BTRFS_DEV_EXTENT_KEY:
11805                 ret = check_dev_extent_item(fs_info, eb, slot);
11806                 err |= ret;
11807                 break;
11808         case BTRFS_EXTENT_ITEM_KEY:
11809         case BTRFS_METADATA_ITEM_KEY:
11810                 ret = check_extent_item(fs_info, eb, slot);
11811                 err |= ret;
11812                 break;
11813         case BTRFS_EXTENT_CSUM_KEY:
11814                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11815                 break;
11816         case BTRFS_TREE_BLOCK_REF_KEY:
11817                 ret = check_tree_block_backref(fs_info, key.offset,
11818                                                key.objectid, -1);
11819                 err |= ret;
11820                 break;
11821         case BTRFS_EXTENT_DATA_REF_KEY:
11822                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11823                 ret = check_extent_data_backref(fs_info,
11824                                 btrfs_extent_data_ref_root(eb, dref),
11825                                 btrfs_extent_data_ref_objectid(eb, dref),
11826                                 btrfs_extent_data_ref_offset(eb, dref),
11827                                 key.objectid, 0,
11828                                 btrfs_extent_data_ref_count(eb, dref));
11829                 err |= ret;
11830                 break;
11831         case BTRFS_SHARED_BLOCK_REF_KEY:
11832                 ret = check_shared_block_backref(fs_info, key.offset,
11833                                                  key.objectid, -1);
11834                 err |= ret;
11835                 break;
11836         case BTRFS_SHARED_DATA_REF_KEY:
11837                 ret = check_shared_data_backref(fs_info, key.offset,
11838                                                 key.objectid);
11839                 err |= ret;
11840                 break;
11841         default:
11842                 break;
11843         }
11844
11845         if (++slot < btrfs_header_nritems(eb))
11846                 goto next;
11847
11848         return err;
11849 }
11850
11851 /*
11852  * Helper function for later fs/subvol tree check.  To determine if a tree
11853  * block should be checked.
11854  * This function will ensure only the direct referencer with lowest rootid to
11855  * check a fs/subvolume tree block.
11856  *
11857  * Backref check at extent tree would detect errors like missing subvolume
11858  * tree, so we can do aggressive check to reduce duplicated checks.
11859  */
11860 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11861 {
11862         struct btrfs_root *extent_root = root->fs_info->extent_root;
11863         struct btrfs_key key;
11864         struct btrfs_path path;
11865         struct extent_buffer *leaf;
11866         int slot;
11867         struct btrfs_extent_item *ei;
11868         unsigned long ptr;
11869         unsigned long end;
11870         int type;
11871         u32 item_size;
11872         u64 offset;
11873         struct btrfs_extent_inline_ref *iref;
11874         int ret;
11875
11876         btrfs_init_path(&path);
11877         key.objectid = btrfs_header_bytenr(eb);
11878         key.type = BTRFS_METADATA_ITEM_KEY;
11879         key.offset = (u64)-1;
11880
11881         /*
11882          * Any failure in backref resolving means we can't determine
11883          * whom the tree block belongs to.
11884          * So in that case, we need to check that tree block
11885          */
11886         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11887         if (ret < 0)
11888                 goto need_check;
11889
11890         ret = btrfs_previous_extent_item(extent_root, &path,
11891                                          btrfs_header_bytenr(eb));
11892         if (ret)
11893                 goto need_check;
11894
11895         leaf = path.nodes[0];
11896         slot = path.slots[0];
11897         btrfs_item_key_to_cpu(leaf, &key, slot);
11898         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11899
11900         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11901                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11902         } else {
11903                 struct btrfs_tree_block_info *info;
11904
11905                 info = (struct btrfs_tree_block_info *)(ei + 1);
11906                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11907         }
11908
11909         item_size = btrfs_item_size_nr(leaf, slot);
11910         ptr = (unsigned long)iref;
11911         end = (unsigned long)ei + item_size;
11912         while (ptr < end) {
11913                 iref = (struct btrfs_extent_inline_ref *)ptr;
11914                 type = btrfs_extent_inline_ref_type(leaf, iref);
11915                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11916
11917                 /*
11918                  * We only check the tree block if current root is
11919                  * the lowest referencer of it.
11920                  */
11921                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11922                     offset < root->objectid) {
11923                         btrfs_release_path(&path);
11924                         return 0;
11925                 }
11926
11927                 ptr += btrfs_extent_inline_ref_size(type);
11928         }
11929         /*
11930          * Normally we should also check keyed tree block ref, but that may be
11931          * very time consuming.  Inlined ref should already make us skip a lot
11932          * of refs now.  So skip search keyed tree block ref.
11933          */
11934
11935 need_check:
11936         btrfs_release_path(&path);
11937         return 1;
11938 }
11939
11940 /*
11941  * Traversal function for tree block. We will do:
11942  * 1) Skip shared fs/subvolume tree blocks
11943  * 2) Update related bytes accounting
11944  * 3) Pre-order traversal
11945  */
11946 static int traverse_tree_block(struct btrfs_root *root,
11947                                 struct extent_buffer *node)
11948 {
11949         struct extent_buffer *eb;
11950         struct btrfs_key key;
11951         struct btrfs_key drop_key;
11952         int level;
11953         u64 nr;
11954         int i;
11955         int err = 0;
11956         int ret;
11957
11958         /*
11959          * Skip shared fs/subvolume tree block, in that case they will
11960          * be checked by referencer with lowest rootid
11961          */
11962         if (is_fstree(root->objectid) && !should_check(root, node))
11963                 return 0;
11964
11965         /* Update bytes accounting */
11966         total_btree_bytes += node->len;
11967         if (fs_root_objectid(btrfs_header_owner(node)))
11968                 total_fs_tree_bytes += node->len;
11969         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11970                 total_extent_tree_bytes += node->len;
11971
11972         /* pre-order tranversal, check itself first */
11973         level = btrfs_header_level(node);
11974         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11975                                    btrfs_header_level(node),
11976                                    btrfs_header_owner(node));
11977         err |= ret;
11978         if (err)
11979                 error(
11980         "check %s failed root %llu bytenr %llu level %d, force continue check",
11981                         level ? "node":"leaf", root->objectid,
11982                         btrfs_header_bytenr(node), btrfs_header_level(node));
11983
11984         if (!level) {
11985                 btree_space_waste += btrfs_leaf_free_space(root, node);
11986                 ret = check_leaf_items(root, node);
11987                 err |= ret;
11988                 return err;
11989         }
11990
11991         nr = btrfs_header_nritems(node);
11992         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11993         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11994                 sizeof(struct btrfs_key_ptr);
11995
11996         /* Then check all its children */
11997         for (i = 0; i < nr; i++) {
11998                 u64 blocknr = btrfs_node_blockptr(node, i);
11999
12000                 btrfs_node_key_to_cpu(node, &key, i);
12001                 if (level == root->root_item.drop_level &&
12002                     is_dropped_key(&key, &drop_key))
12003                         continue;
12004
12005                 /*
12006                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12007                  * to call the function itself.
12008                  */
12009                 eb = read_tree_block(root->fs_info, blocknr, 0);
12010                 if (extent_buffer_uptodate(eb)) {
12011                         ret = traverse_tree_block(root, eb);
12012                         err |= ret;
12013                 }
12014                 free_extent_buffer(eb);
12015         }
12016
12017         return err;
12018 }
12019
12020 /*
12021  * Low memory usage version check_chunks_and_extents.
12022  */
12023 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12024 {
12025         struct btrfs_path path;
12026         struct btrfs_key key;
12027         struct btrfs_root *root1;
12028         struct btrfs_root *root;
12029         struct btrfs_root *cur_root;
12030         int err = 0;
12031         int ret;
12032
12033         root = fs_info->fs_root;
12034
12035         root1 = root->fs_info->chunk_root;
12036         ret = traverse_tree_block(root1, root1->node);
12037         err |= ret;
12038
12039         root1 = root->fs_info->tree_root;
12040         ret = traverse_tree_block(root1, root1->node);
12041         err |= ret;
12042
12043         btrfs_init_path(&path);
12044         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12045         key.offset = 0;
12046         key.type = BTRFS_ROOT_ITEM_KEY;
12047
12048         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12049         if (ret) {
12050                 error("cannot find extent treet in tree_root");
12051                 goto out;
12052         }
12053
12054         while (1) {
12055                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12056                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12057                         goto next;
12058                 key.offset = (u64)-1;
12059
12060                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12061                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12062                                         &key);
12063                 else
12064                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12065                 if (IS_ERR(cur_root) || !cur_root) {
12066                         error("failed to read tree: %lld", key.objectid);
12067                         goto next;
12068                 }
12069
12070                 ret = traverse_tree_block(cur_root, cur_root->node);
12071                 err |= ret;
12072
12073                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12074                         btrfs_free_fs_root(cur_root);
12075 next:
12076                 ret = btrfs_next_item(root1, &path);
12077                 if (ret)
12078                         goto out;
12079         }
12080
12081 out:
12082         btrfs_release_path(&path);
12083         return err;
12084 }
12085
12086 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12087 {
12088         int ret;
12089
12090         if (!ctx.progress_enabled)
12091                 fprintf(stderr, "checking extents\n");
12092         if (check_mode == CHECK_MODE_LOWMEM)
12093                 ret = check_chunks_and_extents_v2(fs_info);
12094         else
12095                 ret = check_chunks_and_extents(fs_info);
12096
12097         return ret;
12098 }
12099
12100 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12101                            struct btrfs_root *root, int overwrite)
12102 {
12103         struct extent_buffer *c;
12104         struct extent_buffer *old = root->node;
12105         int level;
12106         int ret;
12107         struct btrfs_disk_key disk_key = {0,0,0};
12108
12109         level = 0;
12110
12111         if (overwrite) {
12112                 c = old;
12113                 extent_buffer_get(c);
12114                 goto init;
12115         }
12116         c = btrfs_alloc_free_block(trans, root,
12117                                    root->fs_info->nodesize,
12118                                    root->root_key.objectid,
12119                                    &disk_key, level, 0, 0);
12120         if (IS_ERR(c)) {
12121                 c = old;
12122                 extent_buffer_get(c);
12123                 overwrite = 1;
12124         }
12125 init:
12126         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12127         btrfs_set_header_level(c, level);
12128         btrfs_set_header_bytenr(c, c->start);
12129         btrfs_set_header_generation(c, trans->transid);
12130         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12131         btrfs_set_header_owner(c, root->root_key.objectid);
12132
12133         write_extent_buffer(c, root->fs_info->fsid,
12134                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12135
12136         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12137                             btrfs_header_chunk_tree_uuid(c),
12138                             BTRFS_UUID_SIZE);
12139
12140         btrfs_mark_buffer_dirty(c);
12141         /*
12142          * this case can happen in the following case:
12143          *
12144          * 1.overwrite previous root.
12145          *
12146          * 2.reinit reloc data root, this is because we skip pin
12147          * down reloc data tree before which means we can allocate
12148          * same block bytenr here.
12149          */
12150         if (old->start == c->start) {
12151                 btrfs_set_root_generation(&root->root_item,
12152                                           trans->transid);
12153                 root->root_item.level = btrfs_header_level(root->node);
12154                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12155                                         &root->root_key, &root->root_item);
12156                 if (ret) {
12157                         free_extent_buffer(c);
12158                         return ret;
12159                 }
12160         }
12161         free_extent_buffer(old);
12162         root->node = c;
12163         add_root_to_dirty_list(root);
12164         return 0;
12165 }
12166
12167 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12168                                 struct extent_buffer *eb, int tree_root)
12169 {
12170         struct extent_buffer *tmp;
12171         struct btrfs_root_item *ri;
12172         struct btrfs_key key;
12173         u64 bytenr;
12174         int level = btrfs_header_level(eb);
12175         int nritems;
12176         int ret;
12177         int i;
12178
12179         /*
12180          * If we have pinned this block before, don't pin it again.
12181          * This can not only avoid forever loop with broken filesystem
12182          * but also give us some speedups.
12183          */
12184         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12185                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12186                 return 0;
12187
12188         btrfs_pin_extent(fs_info, eb->start, eb->len);
12189
12190         nritems = btrfs_header_nritems(eb);
12191         for (i = 0; i < nritems; i++) {
12192                 if (level == 0) {
12193                         btrfs_item_key_to_cpu(eb, &key, i);
12194                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12195                                 continue;
12196                         /* Skip the extent root and reloc roots */
12197                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12198                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12199                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12200                                 continue;
12201                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12202                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12203
12204                         /*
12205                          * If at any point we start needing the real root we
12206                          * will have to build a stump root for the root we are
12207                          * in, but for now this doesn't actually use the root so
12208                          * just pass in extent_root.
12209                          */
12210                         tmp = read_tree_block(fs_info, bytenr, 0);
12211                         if (!extent_buffer_uptodate(tmp)) {
12212                                 fprintf(stderr, "Error reading root block\n");
12213                                 return -EIO;
12214                         }
12215                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12216                         free_extent_buffer(tmp);
12217                         if (ret)
12218                                 return ret;
12219                 } else {
12220                         bytenr = btrfs_node_blockptr(eb, i);
12221
12222                         /* If we aren't the tree root don't read the block */
12223                         if (level == 1 && !tree_root) {
12224                                 btrfs_pin_extent(fs_info, bytenr,
12225                                                 fs_info->nodesize);
12226                                 continue;
12227                         }
12228
12229                         tmp = read_tree_block(fs_info, bytenr, 0);
12230                         if (!extent_buffer_uptodate(tmp)) {
12231                                 fprintf(stderr, "Error reading tree block\n");
12232                                 return -EIO;
12233                         }
12234                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12235                         free_extent_buffer(tmp);
12236                         if (ret)
12237                                 return ret;
12238                 }
12239         }
12240
12241         return 0;
12242 }
12243
12244 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12245 {
12246         int ret;
12247
12248         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12249         if (ret)
12250                 return ret;
12251
12252         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12253 }
12254
12255 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12256 {
12257         struct btrfs_block_group_cache *cache;
12258         struct btrfs_path path;
12259         struct extent_buffer *leaf;
12260         struct btrfs_chunk *chunk;
12261         struct btrfs_key key;
12262         int ret;
12263         u64 start;
12264
12265         btrfs_init_path(&path);
12266         key.objectid = 0;
12267         key.type = BTRFS_CHUNK_ITEM_KEY;
12268         key.offset = 0;
12269         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12270         if (ret < 0) {
12271                 btrfs_release_path(&path);
12272                 return ret;
12273         }
12274
12275         /*
12276          * We do this in case the block groups were screwed up and had alloc
12277          * bits that aren't actually set on the chunks.  This happens with
12278          * restored images every time and could happen in real life I guess.
12279          */
12280         fs_info->avail_data_alloc_bits = 0;
12281         fs_info->avail_metadata_alloc_bits = 0;
12282         fs_info->avail_system_alloc_bits = 0;
12283
12284         /* First we need to create the in-memory block groups */
12285         while (1) {
12286                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12287                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12288                         if (ret < 0) {
12289                                 btrfs_release_path(&path);
12290                                 return ret;
12291                         }
12292                         if (ret) {
12293                                 ret = 0;
12294                                 break;
12295                         }
12296                 }
12297                 leaf = path.nodes[0];
12298                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12299                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12300                         path.slots[0]++;
12301                         continue;
12302                 }
12303
12304                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12305                 btrfs_add_block_group(fs_info, 0,
12306                                       btrfs_chunk_type(leaf, chunk),
12307                                       key.objectid, key.offset,
12308                                       btrfs_chunk_length(leaf, chunk));
12309                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12310                                  key.offset + btrfs_chunk_length(leaf, chunk));
12311                 path.slots[0]++;
12312         }
12313         start = 0;
12314         while (1) {
12315                 cache = btrfs_lookup_first_block_group(fs_info, start);
12316                 if (!cache)
12317                         break;
12318                 cache->cached = 1;
12319                 start = cache->key.objectid + cache->key.offset;
12320         }
12321
12322         btrfs_release_path(&path);
12323         return 0;
12324 }
12325
12326 static int reset_balance(struct btrfs_trans_handle *trans,
12327                          struct btrfs_fs_info *fs_info)
12328 {
12329         struct btrfs_root *root = fs_info->tree_root;
12330         struct btrfs_path path;
12331         struct extent_buffer *leaf;
12332         struct btrfs_key key;
12333         int del_slot, del_nr = 0;
12334         int ret;
12335         int found = 0;
12336
12337         btrfs_init_path(&path);
12338         key.objectid = BTRFS_BALANCE_OBJECTID;
12339         key.type = BTRFS_BALANCE_ITEM_KEY;
12340         key.offset = 0;
12341         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12342         if (ret) {
12343                 if (ret > 0)
12344                         ret = 0;
12345                 if (!ret)
12346                         goto reinit_data_reloc;
12347                 else
12348                         goto out;
12349         }
12350
12351         ret = btrfs_del_item(trans, root, &path);
12352         if (ret)
12353                 goto out;
12354         btrfs_release_path(&path);
12355
12356         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12357         key.type = BTRFS_ROOT_ITEM_KEY;
12358         key.offset = 0;
12359         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12360         if (ret < 0)
12361                 goto out;
12362         while (1) {
12363                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12364                         if (!found)
12365                                 break;
12366
12367                         if (del_nr) {
12368                                 ret = btrfs_del_items(trans, root, &path,
12369                                                       del_slot, del_nr);
12370                                 del_nr = 0;
12371                                 if (ret)
12372                                         goto out;
12373                         }
12374                         key.offset++;
12375                         btrfs_release_path(&path);
12376
12377                         found = 0;
12378                         ret = btrfs_search_slot(trans, root, &key, &path,
12379                                                 -1, 1);
12380                         if (ret < 0)
12381                                 goto out;
12382                         continue;
12383                 }
12384                 found = 1;
12385                 leaf = path.nodes[0];
12386                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12387                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12388                         break;
12389                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12390                         path.slots[0]++;
12391                         continue;
12392                 }
12393                 if (!del_nr) {
12394                         del_slot = path.slots[0];
12395                         del_nr = 1;
12396                 } else {
12397                         del_nr++;
12398                 }
12399                 path.slots[0]++;
12400         }
12401
12402         if (del_nr) {
12403                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12404                 if (ret)
12405                         goto out;
12406         }
12407         btrfs_release_path(&path);
12408
12409 reinit_data_reloc:
12410         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12411         key.type = BTRFS_ROOT_ITEM_KEY;
12412         key.offset = (u64)-1;
12413         root = btrfs_read_fs_root(fs_info, &key);
12414         if (IS_ERR(root)) {
12415                 fprintf(stderr, "Error reading data reloc tree\n");
12416                 ret = PTR_ERR(root);
12417                 goto out;
12418         }
12419         record_root_in_trans(trans, root);
12420         ret = btrfs_fsck_reinit_root(trans, root, 0);
12421         if (ret)
12422                 goto out;
12423         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12424 out:
12425         btrfs_release_path(&path);
12426         return ret;
12427 }
12428
12429 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12430                               struct btrfs_fs_info *fs_info)
12431 {
12432         u64 start = 0;
12433         int ret;
12434
12435         /*
12436          * The only reason we don't do this is because right now we're just
12437          * walking the trees we find and pinning down their bytes, we don't look
12438          * at any of the leaves.  In order to do mixed groups we'd have to check
12439          * the leaves of any fs roots and pin down the bytes for any file
12440          * extents we find.  Not hard but why do it if we don't have to?
12441          */
12442         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12443                 fprintf(stderr, "We don't support re-initing the extent tree "
12444                         "for mixed block groups yet, please notify a btrfs "
12445                         "developer you want to do this so they can add this "
12446                         "functionality.\n");
12447                 return -EINVAL;
12448         }
12449
12450         /*
12451          * first we need to walk all of the trees except the extent tree and pin
12452          * down the bytes that are in use so we don't overwrite any existing
12453          * metadata.
12454          */
12455         ret = pin_metadata_blocks(fs_info);
12456         if (ret) {
12457                 fprintf(stderr, "error pinning down used bytes\n");
12458                 return ret;
12459         }
12460
12461         /*
12462          * Need to drop all the block groups since we're going to recreate all
12463          * of them again.
12464          */
12465         btrfs_free_block_groups(fs_info);
12466         ret = reset_block_groups(fs_info);
12467         if (ret) {
12468                 fprintf(stderr, "error resetting the block groups\n");
12469                 return ret;
12470         }
12471
12472         /* Ok we can allocate now, reinit the extent root */
12473         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12474         if (ret) {
12475                 fprintf(stderr, "extent root initialization failed\n");
12476                 /*
12477                  * When the transaction code is updated we should end the
12478                  * transaction, but for now progs only knows about commit so
12479                  * just return an error.
12480                  */
12481                 return ret;
12482         }
12483
12484         /*
12485          * Now we have all the in-memory block groups setup so we can make
12486          * allocations properly, and the metadata we care about is safe since we
12487          * pinned all of it above.
12488          */
12489         while (1) {
12490                 struct btrfs_block_group_cache *cache;
12491
12492                 cache = btrfs_lookup_first_block_group(fs_info, start);
12493                 if (!cache)
12494                         break;
12495                 start = cache->key.objectid + cache->key.offset;
12496                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12497                                         &cache->key, &cache->item,
12498                                         sizeof(cache->item));
12499                 if (ret) {
12500                         fprintf(stderr, "Error adding block group\n");
12501                         return ret;
12502                 }
12503                 btrfs_extent_post_op(trans, fs_info->extent_root);
12504         }
12505
12506         ret = reset_balance(trans, fs_info);
12507         if (ret)
12508                 fprintf(stderr, "error resetting the pending balance\n");
12509
12510         return ret;
12511 }
12512
12513 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12514 {
12515         struct btrfs_path path;
12516         struct btrfs_trans_handle *trans;
12517         struct btrfs_key key;
12518         int ret;
12519
12520         printf("Recowing metadata block %llu\n", eb->start);
12521         key.objectid = btrfs_header_owner(eb);
12522         key.type = BTRFS_ROOT_ITEM_KEY;
12523         key.offset = (u64)-1;
12524
12525         root = btrfs_read_fs_root(root->fs_info, &key);
12526         if (IS_ERR(root)) {
12527                 fprintf(stderr, "Couldn't find owner root %llu\n",
12528                         key.objectid);
12529                 return PTR_ERR(root);
12530         }
12531
12532         trans = btrfs_start_transaction(root, 1);
12533         if (IS_ERR(trans))
12534                 return PTR_ERR(trans);
12535
12536         btrfs_init_path(&path);
12537         path.lowest_level = btrfs_header_level(eb);
12538         if (path.lowest_level)
12539                 btrfs_node_key_to_cpu(eb, &key, 0);
12540         else
12541                 btrfs_item_key_to_cpu(eb, &key, 0);
12542
12543         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12544         btrfs_commit_transaction(trans, root);
12545         btrfs_release_path(&path);
12546         return ret;
12547 }
12548
12549 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12550 {
12551         struct btrfs_path path;
12552         struct btrfs_trans_handle *trans;
12553         struct btrfs_key key;
12554         int ret;
12555
12556         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12557                bad->key.type, bad->key.offset);
12558         key.objectid = bad->root_id;
12559         key.type = BTRFS_ROOT_ITEM_KEY;
12560         key.offset = (u64)-1;
12561
12562         root = btrfs_read_fs_root(root->fs_info, &key);
12563         if (IS_ERR(root)) {
12564                 fprintf(stderr, "Couldn't find owner root %llu\n",
12565                         key.objectid);
12566                 return PTR_ERR(root);
12567         }
12568
12569         trans = btrfs_start_transaction(root, 1);
12570         if (IS_ERR(trans))
12571                 return PTR_ERR(trans);
12572
12573         btrfs_init_path(&path);
12574         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12575         if (ret) {
12576                 if (ret > 0)
12577                         ret = 0;
12578                 goto out;
12579         }
12580         ret = btrfs_del_item(trans, root, &path);
12581 out:
12582         btrfs_commit_transaction(trans, root);
12583         btrfs_release_path(&path);
12584         return ret;
12585 }
12586
12587 static int zero_log_tree(struct btrfs_root *root)
12588 {
12589         struct btrfs_trans_handle *trans;
12590         int ret;
12591
12592         trans = btrfs_start_transaction(root, 1);
12593         if (IS_ERR(trans)) {
12594                 ret = PTR_ERR(trans);
12595                 return ret;
12596         }
12597         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12598         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12599         ret = btrfs_commit_transaction(trans, root);
12600         return ret;
12601 }
12602
12603 static int populate_csum(struct btrfs_trans_handle *trans,
12604                          struct btrfs_root *csum_root, char *buf, u64 start,
12605                          u64 len)
12606 {
12607         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12608         u64 offset = 0;
12609         u64 sectorsize;
12610         int ret = 0;
12611
12612         while (offset < len) {
12613                 sectorsize = fs_info->sectorsize;
12614                 ret = read_extent_data(fs_info, buf, start + offset,
12615                                        &sectorsize, 0);
12616                 if (ret)
12617                         break;
12618                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12619                                             start + offset, buf, sectorsize);
12620                 if (ret)
12621                         break;
12622                 offset += sectorsize;
12623         }
12624         return ret;
12625 }
12626
12627 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12628                                       struct btrfs_root *csum_root,
12629                                       struct btrfs_root *cur_root)
12630 {
12631         struct btrfs_path path;
12632         struct btrfs_key key;
12633         struct extent_buffer *node;
12634         struct btrfs_file_extent_item *fi;
12635         char *buf = NULL;
12636         u64 start = 0;
12637         u64 len = 0;
12638         int slot = 0;
12639         int ret = 0;
12640
12641         buf = malloc(cur_root->fs_info->sectorsize);
12642         if (!buf)
12643                 return -ENOMEM;
12644
12645         btrfs_init_path(&path);
12646         key.objectid = 0;
12647         key.offset = 0;
12648         key.type = 0;
12649         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12650         if (ret < 0)
12651                 goto out;
12652         /* Iterate all regular file extents and fill its csum */
12653         while (1) {
12654                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12655
12656                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12657                         goto next;
12658                 node = path.nodes[0];
12659                 slot = path.slots[0];
12660                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12661                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12662                         goto next;
12663                 start = btrfs_file_extent_disk_bytenr(node, fi);
12664                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12665
12666                 ret = populate_csum(trans, csum_root, buf, start, len);
12667                 if (ret == -EEXIST)
12668                         ret = 0;
12669                 if (ret < 0)
12670                         goto out;
12671 next:
12672                 /*
12673                  * TODO: if next leaf is corrupted, jump to nearest next valid
12674                  * leaf.
12675                  */
12676                 ret = btrfs_next_item(cur_root, &path);
12677                 if (ret < 0)
12678                         goto out;
12679                 if (ret > 0) {
12680                         ret = 0;
12681                         goto out;
12682                 }
12683         }
12684
12685 out:
12686         btrfs_release_path(&path);
12687         free(buf);
12688         return ret;
12689 }
12690
12691 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12692                                   struct btrfs_root *csum_root)
12693 {
12694         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12695         struct btrfs_path path;
12696         struct btrfs_root *tree_root = fs_info->tree_root;
12697         struct btrfs_root *cur_root;
12698         struct extent_buffer *node;
12699         struct btrfs_key key;
12700         int slot = 0;
12701         int ret = 0;
12702
12703         btrfs_init_path(&path);
12704         key.objectid = BTRFS_FS_TREE_OBJECTID;
12705         key.offset = 0;
12706         key.type = BTRFS_ROOT_ITEM_KEY;
12707         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12708         if (ret < 0)
12709                 goto out;
12710         if (ret > 0) {
12711                 ret = -ENOENT;
12712                 goto out;
12713         }
12714
12715         while (1) {
12716                 node = path.nodes[0];
12717                 slot = path.slots[0];
12718                 btrfs_item_key_to_cpu(node, &key, slot);
12719                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12720                         goto out;
12721                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12722                         goto next;
12723                 if (!is_fstree(key.objectid))
12724                         goto next;
12725                 key.offset = (u64)-1;
12726
12727                 cur_root = btrfs_read_fs_root(fs_info, &key);
12728                 if (IS_ERR(cur_root) || !cur_root) {
12729                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12730                                 key.objectid);
12731                         goto out;
12732                 }
12733                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12734                                 cur_root);
12735                 if (ret < 0)
12736                         goto out;
12737 next:
12738                 ret = btrfs_next_item(tree_root, &path);
12739                 if (ret > 0) {
12740                         ret = 0;
12741                         goto out;
12742                 }
12743                 if (ret < 0)
12744                         goto out;
12745         }
12746
12747 out:
12748         btrfs_release_path(&path);
12749         return ret;
12750 }
12751
12752 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12753                                       struct btrfs_root *csum_root)
12754 {
12755         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12756         struct btrfs_path path;
12757         struct btrfs_extent_item *ei;
12758         struct extent_buffer *leaf;
12759         char *buf;
12760         struct btrfs_key key;
12761         int ret;
12762
12763         btrfs_init_path(&path);
12764         key.objectid = 0;
12765         key.type = BTRFS_EXTENT_ITEM_KEY;
12766         key.offset = 0;
12767         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12768         if (ret < 0) {
12769                 btrfs_release_path(&path);
12770                 return ret;
12771         }
12772
12773         buf = malloc(csum_root->fs_info->sectorsize);
12774         if (!buf) {
12775                 btrfs_release_path(&path);
12776                 return -ENOMEM;
12777         }
12778
12779         while (1) {
12780                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12781                         ret = btrfs_next_leaf(extent_root, &path);
12782                         if (ret < 0)
12783                                 break;
12784                         if (ret) {
12785                                 ret = 0;
12786                                 break;
12787                         }
12788                 }
12789                 leaf = path.nodes[0];
12790
12791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12792                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12793                         path.slots[0]++;
12794                         continue;
12795                 }
12796
12797                 ei = btrfs_item_ptr(leaf, path.slots[0],
12798                                     struct btrfs_extent_item);
12799                 if (!(btrfs_extent_flags(leaf, ei) &
12800                       BTRFS_EXTENT_FLAG_DATA)) {
12801                         path.slots[0]++;
12802                         continue;
12803                 }
12804
12805                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12806                                     key.offset);
12807                 if (ret)
12808                         break;
12809                 path.slots[0]++;
12810         }
12811
12812         btrfs_release_path(&path);
12813         free(buf);
12814         return ret;
12815 }
12816
12817 /*
12818  * Recalculate the csum and put it into the csum tree.
12819  *
12820  * Extent tree init will wipe out all the extent info, so in that case, we
12821  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12822  * will use fs/subvol trees to init the csum tree.
12823  */
12824 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12825                           struct btrfs_root *csum_root,
12826                           int search_fs_tree)
12827 {
12828         if (search_fs_tree)
12829                 return fill_csum_tree_from_fs(trans, csum_root);
12830         else
12831                 return fill_csum_tree_from_extent(trans, csum_root);
12832 }
12833
12834 static void free_roots_info_cache(void)
12835 {
12836         if (!roots_info_cache)
12837                 return;
12838
12839         while (!cache_tree_empty(roots_info_cache)) {
12840                 struct cache_extent *entry;
12841                 struct root_item_info *rii;
12842
12843                 entry = first_cache_extent(roots_info_cache);
12844                 if (!entry)
12845                         break;
12846                 remove_cache_extent(roots_info_cache, entry);
12847                 rii = container_of(entry, struct root_item_info, cache_extent);
12848                 free(rii);
12849         }
12850
12851         free(roots_info_cache);
12852         roots_info_cache = NULL;
12853 }
12854
12855 static int build_roots_info_cache(struct btrfs_fs_info *info)
12856 {
12857         int ret = 0;
12858         struct btrfs_key key;
12859         struct extent_buffer *leaf;
12860         struct btrfs_path path;
12861
12862         if (!roots_info_cache) {
12863                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12864                 if (!roots_info_cache)
12865                         return -ENOMEM;
12866                 cache_tree_init(roots_info_cache);
12867         }
12868
12869         btrfs_init_path(&path);
12870         key.objectid = 0;
12871         key.type = BTRFS_EXTENT_ITEM_KEY;
12872         key.offset = 0;
12873         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12874         if (ret < 0)
12875                 goto out;
12876         leaf = path.nodes[0];
12877
12878         while (1) {
12879                 struct btrfs_key found_key;
12880                 struct btrfs_extent_item *ei;
12881                 struct btrfs_extent_inline_ref *iref;
12882                 int slot = path.slots[0];
12883                 int type;
12884                 u64 flags;
12885                 u64 root_id;
12886                 u8 level;
12887                 struct cache_extent *entry;
12888                 struct root_item_info *rii;
12889
12890                 if (slot >= btrfs_header_nritems(leaf)) {
12891                         ret = btrfs_next_leaf(info->extent_root, &path);
12892                         if (ret < 0) {
12893                                 break;
12894                         } else if (ret) {
12895                                 ret = 0;
12896                                 break;
12897                         }
12898                         leaf = path.nodes[0];
12899                         slot = path.slots[0];
12900                 }
12901
12902                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12903
12904                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12905                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12906                         goto next;
12907
12908                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12909                 flags = btrfs_extent_flags(leaf, ei);
12910
12911                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12912                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12913                         goto next;
12914
12915                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12916                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12917                         level = found_key.offset;
12918                 } else {
12919                         struct btrfs_tree_block_info *binfo;
12920
12921                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12922                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12923                         level = btrfs_tree_block_level(leaf, binfo);
12924                 }
12925
12926                 /*
12927                  * For a root extent, it must be of the following type and the
12928                  * first (and only one) iref in the item.
12929                  */
12930                 type = btrfs_extent_inline_ref_type(leaf, iref);
12931                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12932                         goto next;
12933
12934                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12935                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12936                 if (!entry) {
12937                         rii = malloc(sizeof(struct root_item_info));
12938                         if (!rii) {
12939                                 ret = -ENOMEM;
12940                                 goto out;
12941                         }
12942                         rii->cache_extent.start = root_id;
12943                         rii->cache_extent.size = 1;
12944                         rii->level = (u8)-1;
12945                         entry = &rii->cache_extent;
12946                         ret = insert_cache_extent(roots_info_cache, entry);
12947                         ASSERT(ret == 0);
12948                 } else {
12949                         rii = container_of(entry, struct root_item_info,
12950                                            cache_extent);
12951                 }
12952
12953                 ASSERT(rii->cache_extent.start == root_id);
12954                 ASSERT(rii->cache_extent.size == 1);
12955
12956                 if (level > rii->level || rii->level == (u8)-1) {
12957                         rii->level = level;
12958                         rii->bytenr = found_key.objectid;
12959                         rii->gen = btrfs_extent_generation(leaf, ei);
12960                         rii->node_count = 1;
12961                 } else if (level == rii->level) {
12962                         rii->node_count++;
12963                 }
12964 next:
12965                 path.slots[0]++;
12966         }
12967
12968 out:
12969         btrfs_release_path(&path);
12970
12971         return ret;
12972 }
12973
12974 static int maybe_repair_root_item(struct btrfs_path *path,
12975                                   const struct btrfs_key *root_key,
12976                                   const int read_only_mode)
12977 {
12978         const u64 root_id = root_key->objectid;
12979         struct cache_extent *entry;
12980         struct root_item_info *rii;
12981         struct btrfs_root_item ri;
12982         unsigned long offset;
12983
12984         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12985         if (!entry) {
12986                 fprintf(stderr,
12987                         "Error: could not find extent items for root %llu\n",
12988                         root_key->objectid);
12989                 return -ENOENT;
12990         }
12991
12992         rii = container_of(entry, struct root_item_info, cache_extent);
12993         ASSERT(rii->cache_extent.start == root_id);
12994         ASSERT(rii->cache_extent.size == 1);
12995
12996         if (rii->node_count != 1) {
12997                 fprintf(stderr,
12998                         "Error: could not find btree root extent for root %llu\n",
12999                         root_id);
13000                 return -ENOENT;
13001         }
13002
13003         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13004         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13005
13006         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13007             btrfs_root_level(&ri) != rii->level ||
13008             btrfs_root_generation(&ri) != rii->gen) {
13009
13010                 /*
13011                  * If we're in repair mode but our caller told us to not update
13012                  * the root item, i.e. just check if it needs to be updated, don't
13013                  * print this message, since the caller will call us again shortly
13014                  * for the same root item without read only mode (the caller will
13015                  * open a transaction first).
13016                  */
13017                 if (!(read_only_mode && repair))
13018                         fprintf(stderr,
13019                                 "%sroot item for root %llu,"
13020                                 " current bytenr %llu, current gen %llu, current level %u,"
13021                                 " new bytenr %llu, new gen %llu, new level %u\n",
13022                                 (read_only_mode ? "" : "fixing "),
13023                                 root_id,
13024                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13025                                 btrfs_root_level(&ri),
13026                                 rii->bytenr, rii->gen, rii->level);
13027
13028                 if (btrfs_root_generation(&ri) > rii->gen) {
13029                         fprintf(stderr,
13030                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13031                                 root_id, btrfs_root_generation(&ri), rii->gen);
13032                         return -EINVAL;
13033                 }
13034
13035                 if (!read_only_mode) {
13036                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13037                         btrfs_set_root_level(&ri, rii->level);
13038                         btrfs_set_root_generation(&ri, rii->gen);
13039                         write_extent_buffer(path->nodes[0], &ri,
13040                                             offset, sizeof(ri));
13041                 }
13042
13043                 return 1;
13044         }
13045
13046         return 0;
13047 }
13048
13049 /*
13050  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13051  * caused read-only snapshots to be corrupted if they were created at a moment
13052  * when the source subvolume/snapshot had orphan items. The issue was that the
13053  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13054  * node instead of the post orphan cleanup root node.
13055  * So this function, and its callees, just detects and fixes those cases. Even
13056  * though the regression was for read-only snapshots, this function applies to
13057  * any snapshot/subvolume root.
13058  * This must be run before any other repair code - not doing it so, makes other
13059  * repair code delete or modify backrefs in the extent tree for example, which
13060  * will result in an inconsistent fs after repairing the root items.
13061  */
13062 static int repair_root_items(struct btrfs_fs_info *info)
13063 {
13064         struct btrfs_path path;
13065         struct btrfs_key key;
13066         struct extent_buffer *leaf;
13067         struct btrfs_trans_handle *trans = NULL;
13068         int ret = 0;
13069         int bad_roots = 0;
13070         int need_trans = 0;
13071
13072         btrfs_init_path(&path);
13073
13074         ret = build_roots_info_cache(info);
13075         if (ret)
13076                 goto out;
13077
13078         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13079         key.type = BTRFS_ROOT_ITEM_KEY;
13080         key.offset = 0;
13081
13082 again:
13083         /*
13084          * Avoid opening and committing transactions if a leaf doesn't have
13085          * any root items that need to be fixed, so that we avoid rotating
13086          * backup roots unnecessarily.
13087          */
13088         if (need_trans) {
13089                 trans = btrfs_start_transaction(info->tree_root, 1);
13090                 if (IS_ERR(trans)) {
13091                         ret = PTR_ERR(trans);
13092                         goto out;
13093                 }
13094         }
13095
13096         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13097                                 0, trans ? 1 : 0);
13098         if (ret < 0)
13099                 goto out;
13100         leaf = path.nodes[0];
13101
13102         while (1) {
13103                 struct btrfs_key found_key;
13104
13105                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13106                         int no_more_keys = find_next_key(&path, &key);
13107
13108                         btrfs_release_path(&path);
13109                         if (trans) {
13110                                 ret = btrfs_commit_transaction(trans,
13111                                                                info->tree_root);
13112                                 trans = NULL;
13113                                 if (ret < 0)
13114                                         goto out;
13115                         }
13116                         need_trans = 0;
13117                         if (no_more_keys)
13118                                 break;
13119                         goto again;
13120                 }
13121
13122                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13123
13124                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13125                         goto next;
13126                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13127                         goto next;
13128
13129                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13130                 if (ret < 0)
13131                         goto out;
13132                 if (ret) {
13133                         if (!trans && repair) {
13134                                 need_trans = 1;
13135                                 key = found_key;
13136                                 btrfs_release_path(&path);
13137                                 goto again;
13138                         }
13139                         bad_roots++;
13140                 }
13141 next:
13142                 path.slots[0]++;
13143         }
13144         ret = 0;
13145 out:
13146         free_roots_info_cache();
13147         btrfs_release_path(&path);
13148         if (trans)
13149                 btrfs_commit_transaction(trans, info->tree_root);
13150         if (ret < 0)
13151                 return ret;
13152
13153         return bad_roots;
13154 }
13155
13156 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13157 {
13158         struct btrfs_trans_handle *trans;
13159         struct btrfs_block_group_cache *bg_cache;
13160         u64 current = 0;
13161         int ret = 0;
13162
13163         /* Clear all free space cache inodes and its extent data */
13164         while (1) {
13165                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13166                 if (!bg_cache)
13167                         break;
13168                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13169                 if (ret < 0)
13170                         return ret;
13171                 current = bg_cache->key.objectid + bg_cache->key.offset;
13172         }
13173
13174         /* Don't forget to set cache_generation to -1 */
13175         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13176         if (IS_ERR(trans)) {
13177                 error("failed to update super block cache generation");
13178                 return PTR_ERR(trans);
13179         }
13180         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13181         btrfs_commit_transaction(trans, fs_info->tree_root);
13182
13183         return ret;
13184 }
13185
13186 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13187                 int clear_version)
13188 {
13189         int ret = 0;
13190
13191         if (clear_version == 1) {
13192                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13193                         error(
13194                 "free space cache v2 detected, use --clear-space-cache v2");
13195                         ret = 1;
13196                         goto close_out;
13197                 }
13198                 printf("Clearing free space cache\n");
13199                 ret = clear_free_space_cache(fs_info);
13200                 if (ret) {
13201                         error("failed to clear free space cache");
13202                         ret = 1;
13203                 } else {
13204                         printf("Free space cache cleared\n");
13205                 }
13206         } else if (clear_version == 2) {
13207                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13208                         printf("no free space cache v2 to clear\n");
13209                         ret = 0;
13210                         goto close_out;
13211                 }
13212                 printf("Clear free space cache v2\n");
13213                 ret = btrfs_clear_free_space_tree(fs_info);
13214                 if (ret) {
13215                         error("failed to clear free space cache v2: %d", ret);
13216                         ret = 1;
13217                 } else {
13218                         printf("free space cache v2 cleared\n");
13219                 }
13220         }
13221 close_out:
13222         return ret;
13223 }
13224
13225 const char * const cmd_check_usage[] = {
13226         "btrfs check [options] <device>",
13227         "Check structural integrity of a filesystem (unmounted).",
13228         "Check structural integrity of an unmounted filesystem. Verify internal",
13229         "trees' consistency and item connectivity. In the repair mode try to",
13230         "fix the problems found. ",
13231         "WARNING: the repair mode is considered dangerous",
13232         "",
13233         "-s|--super <superblock>     use this superblock copy",
13234         "-b|--backup                 use the first valid backup root copy",
13235         "--force                     skip mount checks, repair is not possible",
13236         "--repair                    try to repair the filesystem",
13237         "--readonly                  run in read-only mode (default)",
13238         "--init-csum-tree            create a new CRC tree",
13239         "--init-extent-tree          create a new extent tree",
13240         "--mode <MODE>               allows choice of memory/IO trade-offs",
13241         "                            where MODE is one of:",
13242         "                            original - read inodes and extents to memory (requires",
13243         "                                       more memory, does less IO)",
13244         "                            lowmem   - try to use less memory but read blocks again",
13245         "                                       when needed",
13246         "--check-data-csum           verify checksums of data blocks",
13247         "-Q|--qgroup-report          print a report on qgroup consistency",
13248         "-E|--subvol-extents <subvolid>",
13249         "                            print subvolume extents and sharing state",
13250         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13251         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13252         "-p|--progress               indicate progress",
13253         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13254         NULL
13255 };
13256
13257 int cmd_check(int argc, char **argv)
13258 {
13259         struct cache_tree root_cache;
13260         struct btrfs_root *root;
13261         struct btrfs_fs_info *info;
13262         u64 bytenr = 0;
13263         u64 subvolid = 0;
13264         u64 tree_root_bytenr = 0;
13265         u64 chunk_root_bytenr = 0;
13266         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13267         int ret = 0;
13268         int err = 0;
13269         u64 num;
13270         int init_csum_tree = 0;
13271         int readonly = 0;
13272         int clear_space_cache = 0;
13273         int qgroup_report = 0;
13274         int qgroups_repaired = 0;
13275         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13276         int force = 0;
13277
13278         while(1) {
13279                 int c;
13280                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13281                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13282                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13283                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13284                         GETOPT_VAL_FORCE };
13285                 static const struct option long_options[] = {
13286                         { "super", required_argument, NULL, 's' },
13287                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13288                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13289                         { "init-csum-tree", no_argument, NULL,
13290                                 GETOPT_VAL_INIT_CSUM },
13291                         { "init-extent-tree", no_argument, NULL,
13292                                 GETOPT_VAL_INIT_EXTENT },
13293                         { "check-data-csum", no_argument, NULL,
13294                                 GETOPT_VAL_CHECK_CSUM },
13295                         { "backup", no_argument, NULL, 'b' },
13296                         { "subvol-extents", required_argument, NULL, 'E' },
13297                         { "qgroup-report", no_argument, NULL, 'Q' },
13298                         { "tree-root", required_argument, NULL, 'r' },
13299                         { "chunk-root", required_argument, NULL,
13300                                 GETOPT_VAL_CHUNK_TREE },
13301                         { "progress", no_argument, NULL, 'p' },
13302                         { "mode", required_argument, NULL,
13303                                 GETOPT_VAL_MODE },
13304                         { "clear-space-cache", required_argument, NULL,
13305                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13306                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13307                         { NULL, 0, NULL, 0}
13308                 };
13309
13310                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13311                 if (c < 0)
13312                         break;
13313                 switch(c) {
13314                         case 'a': /* ignored */ break;
13315                         case 'b':
13316                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13317                                 break;
13318                         case 's':
13319                                 num = arg_strtou64(optarg);
13320                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13321                                         error(
13322                                         "super mirror should be less than %d",
13323                                                 BTRFS_SUPER_MIRROR_MAX);
13324                                         exit(1);
13325                                 }
13326                                 bytenr = btrfs_sb_offset(((int)num));
13327                                 printf("using SB copy %llu, bytenr %llu\n", num,
13328                                        (unsigned long long)bytenr);
13329                                 break;
13330                         case 'Q':
13331                                 qgroup_report = 1;
13332                                 break;
13333                         case 'E':
13334                                 subvolid = arg_strtou64(optarg);
13335                                 break;
13336                         case 'r':
13337                                 tree_root_bytenr = arg_strtou64(optarg);
13338                                 break;
13339                         case GETOPT_VAL_CHUNK_TREE:
13340                                 chunk_root_bytenr = arg_strtou64(optarg);
13341                                 break;
13342                         case 'p':
13343                                 ctx.progress_enabled = true;
13344                                 break;
13345                         case '?':
13346                         case 'h':
13347                                 usage(cmd_check_usage);
13348                         case GETOPT_VAL_REPAIR:
13349                                 printf("enabling repair mode\n");
13350                                 repair = 1;
13351                                 ctree_flags |= OPEN_CTREE_WRITES;
13352                                 break;
13353                         case GETOPT_VAL_READONLY:
13354                                 readonly = 1;
13355                                 break;
13356                         case GETOPT_VAL_INIT_CSUM:
13357                                 printf("Creating a new CRC tree\n");
13358                                 init_csum_tree = 1;
13359                                 repair = 1;
13360                                 ctree_flags |= OPEN_CTREE_WRITES;
13361                                 break;
13362                         case GETOPT_VAL_INIT_EXTENT:
13363                                 init_extent_tree = 1;
13364                                 ctree_flags |= (OPEN_CTREE_WRITES |
13365                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13366                                 repair = 1;
13367                                 break;
13368                         case GETOPT_VAL_CHECK_CSUM:
13369                                 check_data_csum = 1;
13370                                 break;
13371                         case GETOPT_VAL_MODE:
13372                                 check_mode = parse_check_mode(optarg);
13373                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13374                                         error("unknown mode: %s", optarg);
13375                                         exit(1);
13376                                 }
13377                                 break;
13378                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13379                                 if (strcmp(optarg, "v1") == 0) {
13380                                         clear_space_cache = 1;
13381                                 } else if (strcmp(optarg, "v2") == 0) {
13382                                         clear_space_cache = 2;
13383                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13384                                 } else {
13385                                         error(
13386                 "invalid argument to --clear-space-cache, must be v1 or v2");
13387                                         exit(1);
13388                                 }
13389                                 ctree_flags |= OPEN_CTREE_WRITES;
13390                                 break;
13391                         case GETOPT_VAL_FORCE:
13392                                 force = 1;
13393                                 break;
13394                 }
13395         }
13396
13397         if (check_argc_exact(argc - optind, 1))
13398                 usage(cmd_check_usage);
13399
13400         if (ctx.progress_enabled) {
13401                 ctx.tp = TASK_NOTHING;
13402                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13403         }
13404
13405         /* This check is the only reason for --readonly to exist */
13406         if (readonly && repair) {
13407                 error("repair options are not compatible with --readonly");
13408                 exit(1);
13409         }
13410
13411         /*
13412          * experimental and dangerous
13413          */
13414         if (repair && check_mode == CHECK_MODE_LOWMEM)
13415                 warning("low-memory mode repair support is only partial");
13416
13417         radix_tree_init();
13418         cache_tree_init(&root_cache);
13419
13420         ret = check_mounted(argv[optind]);
13421         if (!force) {
13422                 if (ret < 0) {
13423                         error("could not check mount status: %s",
13424                                         strerror(-ret));
13425                         err |= !!ret;
13426                         goto err_out;
13427                 } else if (ret) {
13428                         error(
13429 "%s is currently mounted, use --force if you really intend to check the filesystem",
13430                                 argv[optind]);
13431                         ret = -EBUSY;
13432                         err |= !!ret;
13433                         goto err_out;
13434                 }
13435         } else {
13436                 if (repair) {
13437                         error("repair and --force is not yet supported");
13438                         ret = 1;
13439                         err |= !!ret;
13440                         goto err_out;
13441                 }
13442                 if (ret < 0) {
13443                         warning(
13444 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13445                                 argv[optind]);
13446                 } else if (ret) {
13447                         warning(
13448                         "filesystem mounted, continuing because of --force");
13449                 }
13450                 /* A block device is mounted in exclusive mode by kernel */
13451                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13452         }
13453
13454         /* only allow partial opening under repair mode */
13455         if (repair)
13456                 ctree_flags |= OPEN_CTREE_PARTIAL;
13457
13458         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13459                                   chunk_root_bytenr, ctree_flags);
13460         if (!info) {
13461                 error("cannot open file system");
13462                 ret = -EIO;
13463                 err |= !!ret;
13464                 goto err_out;
13465         }
13466
13467         global_info = info;
13468         root = info->fs_root;
13469         uuid_unparse(info->super_copy->fsid, uuidbuf);
13470
13471         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13472
13473         /*
13474          * Check the bare minimum before starting anything else that could rely
13475          * on it, namely the tree roots, any local consistency checks
13476          */
13477         if (!extent_buffer_uptodate(info->tree_root->node) ||
13478             !extent_buffer_uptodate(info->dev_root->node) ||
13479             !extent_buffer_uptodate(info->chunk_root->node)) {
13480                 error("critical roots corrupted, unable to check the filesystem");
13481                 err |= !!ret;
13482                 ret = -EIO;
13483                 goto close_out;
13484         }
13485
13486         if (clear_space_cache) {
13487                 ret = do_clear_free_space_cache(info, clear_space_cache);
13488                 err |= !!ret;
13489                 goto close_out;
13490         }
13491
13492         /*
13493          * repair mode will force us to commit transaction which
13494          * will make us fail to load log tree when mounting.
13495          */
13496         if (repair && btrfs_super_log_root(info->super_copy)) {
13497                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13498                 if (!ret) {
13499                         ret = 1;
13500                         err |= !!ret;
13501                         goto close_out;
13502                 }
13503                 ret = zero_log_tree(root);
13504                 err |= !!ret;
13505                 if (ret) {
13506                         error("failed to zero log tree: %d", ret);
13507                         goto close_out;
13508                 }
13509         }
13510
13511         if (qgroup_report) {
13512                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13513                        uuidbuf);
13514                 ret = qgroup_verify_all(info);
13515                 err |= !!ret;
13516                 if (ret == 0)
13517                         report_qgroups(1);
13518                 goto close_out;
13519         }
13520         if (subvolid) {
13521                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13522                        subvolid, argv[optind], uuidbuf);
13523                 ret = print_extent_state(info, subvolid);
13524                 err |= !!ret;
13525                 goto close_out;
13526         }
13527
13528         if (init_extent_tree || init_csum_tree) {
13529                 struct btrfs_trans_handle *trans;
13530
13531                 trans = btrfs_start_transaction(info->extent_root, 0);
13532                 if (IS_ERR(trans)) {
13533                         error("error starting transaction");
13534                         ret = PTR_ERR(trans);
13535                         err |= !!ret;
13536                         goto close_out;
13537                 }
13538
13539                 if (init_extent_tree) {
13540                         printf("Creating a new extent tree\n");
13541                         ret = reinit_extent_tree(trans, info);
13542                         err |= !!ret;
13543                         if (ret)
13544                                 goto close_out;
13545                 }
13546
13547                 if (init_csum_tree) {
13548                         printf("Reinitialize checksum tree\n");
13549                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13550                         if (ret) {
13551                                 error("checksum tree initialization failed: %d",
13552                                                 ret);
13553                                 ret = -EIO;
13554                                 err |= !!ret;
13555                                 goto close_out;
13556                         }
13557
13558                         ret = fill_csum_tree(trans, info->csum_root,
13559                                              init_extent_tree);
13560                         err |= !!ret;
13561                         if (ret) {
13562                                 error("checksum tree refilling failed: %d", ret);
13563                                 return -EIO;
13564                         }
13565                 }
13566                 /*
13567                  * Ok now we commit and run the normal fsck, which will add
13568                  * extent entries for all of the items it finds.
13569                  */
13570                 ret = btrfs_commit_transaction(trans, info->extent_root);
13571                 err |= !!ret;
13572                 if (ret)
13573                         goto close_out;
13574         }
13575         if (!extent_buffer_uptodate(info->extent_root->node)) {
13576                 error("critical: extent_root, unable to check the filesystem");
13577                 ret = -EIO;
13578                 err |= !!ret;
13579                 goto close_out;
13580         }
13581         if (!extent_buffer_uptodate(info->csum_root->node)) {
13582                 error("critical: csum_root, unable to check the filesystem");
13583                 ret = -EIO;
13584                 err |= !!ret;
13585                 goto close_out;
13586         }
13587
13588         ret = do_check_chunks_and_extents(info);
13589         err |= !!ret;
13590         if (ret)
13591                 error(
13592                 "errors found in extent allocation tree or chunk allocation");
13593
13594         ret = repair_root_items(info);
13595         err |= !!ret;
13596         if (ret < 0) {
13597                 error("failed to repair root items: %s", strerror(-ret));
13598                 goto close_out;
13599         }
13600         if (repair) {
13601                 fprintf(stderr, "Fixed %d roots.\n", ret);
13602                 ret = 0;
13603         } else if (ret > 0) {
13604                 fprintf(stderr,
13605                        "Found %d roots with an outdated root item.\n",
13606                        ret);
13607                 fprintf(stderr,
13608                         "Please run a filesystem check with the option --repair to fix them.\n");
13609                 ret = 1;
13610                 err |= !!ret;
13611                 goto close_out;
13612         }
13613
13614         if (!ctx.progress_enabled) {
13615                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13616                         fprintf(stderr, "checking free space tree\n");
13617                 else
13618                         fprintf(stderr, "checking free space cache\n");
13619         }
13620         ret = check_space_cache(root);
13621         err |= !!ret;
13622         if (ret) {
13623                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13624                         error("errors found in free space tree");
13625                 else
13626                         error("errors found in free space cache");
13627                 goto out;
13628         }
13629
13630         /*
13631          * We used to have to have these hole extents in between our real
13632          * extents so if we don't have this flag set we need to make sure there
13633          * are no gaps in the file extents for inodes, otherwise we can just
13634          * ignore it when this happens.
13635          */
13636         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13637         ret = do_check_fs_roots(info, &root_cache);
13638         err |= !!ret;
13639         if (ret) {
13640                 error("errors found in fs roots");
13641                 goto out;
13642         }
13643
13644         fprintf(stderr, "checking csums\n");
13645         ret = check_csums(root);
13646         err |= !!ret;
13647         if (ret) {
13648                 error("errors found in csum tree");
13649                 goto out;
13650         }
13651
13652         fprintf(stderr, "checking root refs\n");
13653         /* For low memory mode, check_fs_roots_v2 handles root refs */
13654         if (check_mode != CHECK_MODE_LOWMEM) {
13655                 ret = check_root_refs(root, &root_cache);
13656                 err |= !!ret;
13657                 if (ret) {
13658                         error("errors found in root refs");
13659                         goto out;
13660                 }
13661         }
13662
13663         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13664                 struct extent_buffer *eb;
13665
13666                 eb = list_first_entry(&root->fs_info->recow_ebs,
13667                                       struct extent_buffer, recow);
13668                 list_del_init(&eb->recow);
13669                 ret = recow_extent_buffer(root, eb);
13670                 err |= !!ret;
13671                 if (ret) {
13672                         error("fails to fix transid errors");
13673                         break;
13674                 }
13675         }
13676
13677         while (!list_empty(&delete_items)) {
13678                 struct bad_item *bad;
13679
13680                 bad = list_first_entry(&delete_items, struct bad_item, list);
13681                 list_del_init(&bad->list);
13682                 if (repair) {
13683                         ret = delete_bad_item(root, bad);
13684                         err |= !!ret;
13685                 }
13686                 free(bad);
13687         }
13688
13689         if (info->quota_enabled) {
13690                 fprintf(stderr, "checking quota groups\n");
13691                 ret = qgroup_verify_all(info);
13692                 err |= !!ret;
13693                 if (ret) {
13694                         error("failed to check quota groups");
13695                         goto out;
13696                 }
13697                 report_qgroups(0);
13698                 ret = repair_qgroups(info, &qgroups_repaired);
13699                 err |= !!ret;
13700                 if (err) {
13701                         error("failed to repair quota groups");
13702                         goto out;
13703                 }
13704                 ret = 0;
13705         }
13706
13707         if (!list_empty(&root->fs_info->recow_ebs)) {
13708                 error("transid errors in file system");
13709                 ret = 1;
13710                 err |= !!ret;
13711         }
13712 out:
13713         printf("found %llu bytes used, ",
13714                (unsigned long long)bytes_used);
13715         if (err)
13716                 printf("error(s) found\n");
13717         else
13718                 printf("no error found\n");
13719         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13720         printf("total tree bytes: %llu\n",
13721                (unsigned long long)total_btree_bytes);
13722         printf("total fs tree bytes: %llu\n",
13723                (unsigned long long)total_fs_tree_bytes);
13724         printf("total extent tree bytes: %llu\n",
13725                (unsigned long long)total_extent_tree_bytes);
13726         printf("btree space waste bytes: %llu\n",
13727                (unsigned long long)btree_space_waste);
13728         printf("file data blocks allocated: %llu\n referenced %llu\n",
13729                 (unsigned long long)data_bytes_allocated,
13730                 (unsigned long long)data_bytes_referenced);
13731
13732         free_qgroup_counts();
13733         free_root_recs_tree(&root_cache);
13734 close_out:
13735         close_ctree(root);
13736 err_out:
13737         if (ctx.progress_enabled)
13738                 task_deinit(ctx.info);
13739
13740         return err;
13741 }