btrfs-progs: check: change find_inode_ref()'s arg
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
140 {
141         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143         struct data_backref *back1 = to_data_backref(ext1);
144         struct data_backref *back2 = to_data_backref(ext2);
145
146         WARN_ON(!ext1->is_data);
147         WARN_ON(!ext2->is_data);
148
149         /* parent and root are a union, so this covers both */
150         if (back1->parent > back2->parent)
151                 return 1;
152         if (back1->parent < back2->parent)
153                 return -1;
154
155         /* This is a full backref and the parents match. */
156         if (back1->node.full_backref)
157                 return 0;
158
159         if (back1->owner > back2->owner)
160                 return 1;
161         if (back1->owner < back2->owner)
162                 return -1;
163
164         if (back1->offset > back2->offset)
165                 return 1;
166         if (back1->offset < back2->offset)
167                 return -1;
168
169         if (back1->found_ref && back2->found_ref) {
170                 if (back1->disk_bytenr > back2->disk_bytenr)
171                         return 1;
172                 if (back1->disk_bytenr < back2->disk_bytenr)
173                         return -1;
174
175                 if (back1->bytes > back2->bytes)
176                         return 1;
177                 if (back1->bytes < back2->bytes)
178                         return -1;
179         }
180
181         return 0;
182 }
183
184 /*
185  * Much like data_backref, just removed the undetermined members
186  * and change it to use list_head.
187  * During extent scan, it is stored in root->orphan_data_extent.
188  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
189  */
190 struct orphan_data_extent {
191         struct list_head list;
192         u64 root;
193         u64 objectid;
194         u64 offset;
195         u64 disk_bytenr;
196         u64 disk_len;
197 };
198
199 struct tree_backref {
200         struct extent_backref node;
201         union {
202                 u64 parent;
203                 u64 root;
204         };
205 };
206
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
208 {
209         return container_of(back, struct tree_backref, node);
210 }
211
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
213 {
214         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216         struct tree_backref *back1 = to_tree_backref(ext1);
217         struct tree_backref *back2 = to_tree_backref(ext2);
218
219         WARN_ON(ext1->is_data);
220         WARN_ON(ext2->is_data);
221
222         /* parent and root are a union, so this covers both */
223         if (back1->parent > back2->parent)
224                 return 1;
225         if (back1->parent < back2->parent)
226                 return -1;
227
228         return 0;
229 }
230
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
232 {
233         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
235
236         if (ext1->is_data > ext2->is_data)
237                 return 1;
238
239         if (ext1->is_data < ext2->is_data)
240                 return -1;
241
242         if (ext1->full_backref > ext2->full_backref)
243                 return 1;
244         if (ext1->full_backref < ext2->full_backref)
245                 return -1;
246
247         if (ext1->is_data)
248                 return compare_data_backref(node1, node2);
249         else
250                 return compare_tree_backref(node1, node2);
251 }
252
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
255
256 struct extent_record {
257         struct list_head backrefs;
258         struct list_head dups;
259         struct rb_root backref_tree;
260         struct list_head list;
261         struct cache_extent cache;
262         struct btrfs_disk_key parent_key;
263         u64 start;
264         u64 max_size;
265         u64 nr;
266         u64 refs;
267         u64 extent_item_refs;
268         u64 generation;
269         u64 parent_generation;
270         u64 info_objectid;
271         u32 num_duplicates;
272         u8 info_level;
273         unsigned int flag_block_full_backref:2;
274         unsigned int found_rec:1;
275         unsigned int content_checked:1;
276         unsigned int owner_ref_checked:1;
277         unsigned int is_root:1;
278         unsigned int metadata:1;
279         unsigned int bad_full_backref:1;
280         unsigned int crossing_stripes:1;
281         unsigned int wrong_chunk_type:1;
282 };
283
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
285 {
286         return container_of(entry, struct extent_record, list);
287 }
288
289 struct inode_backref {
290         struct list_head list;
291         unsigned int found_dir_item:1;
292         unsigned int found_dir_index:1;
293         unsigned int found_inode_ref:1;
294         u8 filetype;
295         u8 ref_type;
296         int errors;
297         u64 dir;
298         u64 index;
299         u16 namelen;
300         char name[0];
301 };
302
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
304 {
305         return list_entry(entry, struct inode_backref, list);
306 }
307
308 struct root_item_record {
309         struct list_head list;
310         u64 objectid;
311         u64 bytenr;
312         u64 last_snapshot;
313         u8 level;
314         u8 drop_level;
315         struct btrfs_key drop_key;
316 };
317
318 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
320 #define REF_ERR_NO_INODE_REF            (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
323 #define REF_ERR_DUP_INODE_REF           (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF             (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
331
332 struct file_extent_hole {
333         struct rb_node node;
334         u64 start;
335         u64 len;
336 };
337
338 struct inode_record {
339         struct list_head backrefs;
340         unsigned int checked:1;
341         unsigned int merging:1;
342         unsigned int found_inode_item:1;
343         unsigned int found_dir_item:1;
344         unsigned int found_file_extent:1;
345         unsigned int found_csum_item:1;
346         unsigned int some_csum_missing:1;
347         unsigned int nodatasum:1;
348         int errors;
349
350         u64 ino;
351         u32 nlink;
352         u32 imode;
353         u64 isize;
354         u64 nbytes;
355
356         u32 found_link;
357         u64 found_size;
358         u64 extent_start;
359         u64 extent_end;
360         struct rb_root holes;
361         struct list_head orphan_extents;
362
363         u32 refs;
364 };
365
366 #define I_ERR_NO_INODE_ITEM             (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
381
382 struct root_backref {
383         struct list_head list;
384         unsigned int found_dir_item:1;
385         unsigned int found_dir_index:1;
386         unsigned int found_back_ref:1;
387         unsigned int found_forward_ref:1;
388         unsigned int reachable:1;
389         int errors;
390         u64 ref_root;
391         u64 dir;
392         u64 index;
393         u16 namelen;
394         char name[0];
395 };
396
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
398 {
399         return list_entry(entry, struct root_backref, list);
400 }
401
402 struct root_record {
403         struct list_head backrefs;
404         struct cache_extent cache;
405         unsigned int found_root_item:1;
406         u64 objectid;
407         u32 found_ref;
408 };
409
410 struct ptr_node {
411         struct cache_extent cache;
412         void *data;
413 };
414
415 struct shared_node {
416         struct cache_extent cache;
417         struct cache_tree root_cache;
418         struct cache_tree inode_cache;
419         struct inode_record *current;
420         u32 refs;
421 };
422
423 struct block_info {
424         u64 start;
425         u32 size;
426 };
427
428 struct walk_control {
429         struct cache_tree shared;
430         struct shared_node *nodes[BTRFS_MAX_LEVEL];
431         int active_node;
432         int root_level;
433 };
434
435 struct bad_item {
436         struct btrfs_key key;
437         u64 root_id;
438         struct list_head list;
439 };
440
441 struct extent_entry {
442         u64 bytenr;
443         u64 bytes;
444         int count;
445         int broken;
446         struct list_head list;
447 };
448
449 struct root_item_info {
450         /* level of the root */
451         u8 level;
452         /* number of nodes at this level, must be 1 for a root */
453         int node_count;
454         u64 bytenr;
455         u64 gen;
456         struct cache_extent cache_extent;
457 };
458
459 /*
460  * Error bit for low memory mode check.
461  *
462  * Currently no caller cares about it yet.  Just internal use for error
463  * classification.
464  */
465 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH     (1 << 8)
475
476 static void *print_status_check(void *p)
477 {
478         struct task_ctx *priv = p;
479         const char work_indicator[] = { '.', 'o', 'O', 'o' };
480         uint32_t count = 0;
481         static char *task_position_string[] = {
482                 "checking extents",
483                 "checking free space cache",
484                 "checking fs roots",
485         };
486
487         task_period_start(priv->info, 1000 /* 1s */);
488
489         if (priv->tp == TASK_NOTHING)
490                 return NULL;
491
492         while (1) {
493                 printf("%s [%c]\r", task_position_string[priv->tp],
494                                 work_indicator[count % 4]);
495                 count++;
496                 fflush(stdout);
497                 task_period_wait(priv->info);
498         }
499         return NULL;
500 }
501
502 static int print_status_return(void *p)
503 {
504         printf("\n");
505         fflush(stdout);
506
507         return 0;
508 }
509
510 static enum btrfs_check_mode parse_check_mode(const char *str)
511 {
512         if (strcmp(str, "lowmem") == 0)
513                 return CHECK_MODE_LOWMEM;
514         if (strcmp(str, "orig") == 0)
515                 return CHECK_MODE_ORIGINAL;
516         if (strcmp(str, "original") == 0)
517                 return CHECK_MODE_ORIGINAL;
518
519         return CHECK_MODE_UNKNOWN;
520 }
521
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
524 {
525         struct file_extent_hole *hole;
526
527         if (RB_EMPTY_ROOT(holes))
528                 return (u64)-1;
529
530         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
531         return hole->start;
532 }
533
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
535 {
536         struct file_extent_hole *hole1;
537         struct file_extent_hole *hole2;
538
539         hole1 = rb_entry(node1, struct file_extent_hole, node);
540         hole2 = rb_entry(node2, struct file_extent_hole, node);
541
542         if (hole1->start > hole2->start)
543                 return -1;
544         if (hole1->start < hole2->start)
545                 return 1;
546         /* Now hole1->start == hole2->start */
547         if (hole1->len >= hole2->len)
548                 /*
549                  * Hole 1 will be merge center
550                  * Same hole will be merged later
551                  */
552                 return -1;
553         /* Hole 2 will be merge center */
554         return 1;
555 }
556
557 /*
558  * Add a hole to the record
559  *
560  * This will do hole merge for copy_file_extent_holes(),
561  * which will ensure there won't be continuous holes.
562  */
563 static int add_file_extent_hole(struct rb_root *holes,
564                                 u64 start, u64 len)
565 {
566         struct file_extent_hole *hole;
567         struct file_extent_hole *prev = NULL;
568         struct file_extent_hole *next = NULL;
569
570         hole = malloc(sizeof(*hole));
571         if (!hole)
572                 return -ENOMEM;
573         hole->start = start;
574         hole->len = len;
575         /* Since compare will not return 0, no -EEXIST will happen */
576         rb_insert(holes, &hole->node, compare_hole);
577
578         /* simple merge with previous hole */
579         if (rb_prev(&hole->node))
580                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
581                                 node);
582         if (prev && prev->start + prev->len >= hole->start) {
583                 hole->len = hole->start + hole->len - prev->start;
584                 hole->start = prev->start;
585                 rb_erase(&prev->node, holes);
586                 free(prev);
587                 prev = NULL;
588         }
589
590         /* iterate merge with next holes */
591         while (1) {
592                 if (!rb_next(&hole->node))
593                         break;
594                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
595                                         node);
596                 if (hole->start + hole->len >= next->start) {
597                         if (hole->start + hole->len <= next->start + next->len)
598                                 hole->len = next->start + next->len -
599                                             hole->start;
600                         rb_erase(&next->node, holes);
601                         free(next);
602                         next = NULL;
603                 } else
604                         break;
605         }
606         return 0;
607 }
608
609 static int compare_hole_range(struct rb_node *node, void *data)
610 {
611         struct file_extent_hole *hole;
612         u64 start;
613
614         hole = (struct file_extent_hole *)data;
615         start = hole->start;
616
617         hole = rb_entry(node, struct file_extent_hole, node);
618         if (start < hole->start)
619                 return -1;
620         if (start >= hole->start && start < hole->start + hole->len)
621                 return 0;
622         return 1;
623 }
624
625 /*
626  * Delete a hole in the record
627  *
628  * This will do the hole split and is much restrict than add.
629  */
630 static int del_file_extent_hole(struct rb_root *holes,
631                                 u64 start, u64 len)
632 {
633         struct file_extent_hole *hole;
634         struct file_extent_hole tmp;
635         u64 prev_start = 0;
636         u64 prev_len = 0;
637         u64 next_start = 0;
638         u64 next_len = 0;
639         struct rb_node *node;
640         int have_prev = 0;
641         int have_next = 0;
642         int ret = 0;
643
644         tmp.start = start;
645         tmp.len = len;
646         node = rb_search(holes, &tmp, compare_hole_range, NULL);
647         if (!node)
648                 return -EEXIST;
649         hole = rb_entry(node, struct file_extent_hole, node);
650         if (start + len > hole->start + hole->len)
651                 return -EEXIST;
652
653         /*
654          * Now there will be no overlap, delete the hole and re-add the
655          * split(s) if they exists.
656          */
657         if (start > hole->start) {
658                 prev_start = hole->start;
659                 prev_len = start - hole->start;
660                 have_prev = 1;
661         }
662         if (hole->start + hole->len > start + len) {
663                 next_start = start + len;
664                 next_len = hole->start + hole->len - start - len;
665                 have_next = 1;
666         }
667         rb_erase(node, holes);
668         free(hole);
669         if (have_prev) {
670                 ret = add_file_extent_hole(holes, prev_start, prev_len);
671                 if (ret < 0)
672                         return ret;
673         }
674         if (have_next) {
675                 ret = add_file_extent_hole(holes, next_start, next_len);
676                 if (ret < 0)
677                         return ret;
678         }
679         return 0;
680 }
681
682 static int copy_file_extent_holes(struct rb_root *dst,
683                                   struct rb_root *src)
684 {
685         struct file_extent_hole *hole;
686         struct rb_node *node;
687         int ret = 0;
688
689         node = rb_first(src);
690         while (node) {
691                 hole = rb_entry(node, struct file_extent_hole, node);
692                 ret = add_file_extent_hole(dst, hole->start, hole->len);
693                 if (ret)
694                         break;
695                 node = rb_next(node);
696         }
697         return ret;
698 }
699
700 static void free_file_extent_holes(struct rb_root *holes)
701 {
702         struct rb_node *node;
703         struct file_extent_hole *hole;
704
705         node = rb_first(holes);
706         while (node) {
707                 hole = rb_entry(node, struct file_extent_hole, node);
708                 rb_erase(node, holes);
709                 free(hole);
710                 node = rb_first(holes);
711         }
712 }
713
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
715
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717                                  struct btrfs_root *root)
718 {
719         if (root->last_trans != trans->transid) {
720                 root->track_dirty = 1;
721                 root->last_trans = trans->transid;
722                 root->commit_root = root->node;
723                 extent_buffer_get(root->node);
724         }
725 }
726
727 static u8 imode_to_type(u32 imode)
728 {
729 #define S_SHIFT 12
730         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
732                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
733                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
734                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
735                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
736                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
737                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
738         };
739
740         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
741 #undef S_SHIFT
742 }
743
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
745 {
746         struct device_record *rec1;
747         struct device_record *rec2;
748
749         rec1 = rb_entry(node1, struct device_record, node);
750         rec2 = rb_entry(node2, struct device_record, node);
751         if (rec1->devid > rec2->devid)
752                 return -1;
753         else if (rec1->devid < rec2->devid)
754                 return 1;
755         else
756                 return 0;
757 }
758
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
760 {
761         struct inode_record *rec;
762         struct inode_backref *backref;
763         struct inode_backref *orig;
764         struct inode_backref *tmp;
765         struct orphan_data_extent *src_orphan;
766         struct orphan_data_extent *dst_orphan;
767         struct rb_node *rb;
768         size_t size;
769         int ret;
770
771         rec = malloc(sizeof(*rec));
772         if (!rec)
773                 return ERR_PTR(-ENOMEM);
774         memcpy(rec, orig_rec, sizeof(*rec));
775         rec->refs = 1;
776         INIT_LIST_HEAD(&rec->backrefs);
777         INIT_LIST_HEAD(&rec->orphan_extents);
778         rec->holes = RB_ROOT;
779
780         list_for_each_entry(orig, &orig_rec->backrefs, list) {
781                 size = sizeof(*orig) + orig->namelen + 1;
782                 backref = malloc(size);
783                 if (!backref) {
784                         ret = -ENOMEM;
785                         goto cleanup;
786                 }
787                 memcpy(backref, orig, size);
788                 list_add_tail(&backref->list, &rec->backrefs);
789         }
790         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791                 dst_orphan = malloc(sizeof(*dst_orphan));
792                 if (!dst_orphan) {
793                         ret = -ENOMEM;
794                         goto cleanup;
795                 }
796                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
798         }
799         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
800         if (ret < 0)
801                 goto cleanup_rb;
802
803         return rec;
804
805 cleanup_rb:
806         rb = rb_first(&rec->holes);
807         while (rb) {
808                 struct file_extent_hole *hole;
809
810                 hole = rb_entry(rb, struct file_extent_hole, node);
811                 rb = rb_next(rb);
812                 free(hole);
813         }
814
815 cleanup:
816         if (!list_empty(&rec->backrefs))
817                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818                         list_del(&orig->list);
819                         free(orig);
820                 }
821
822         if (!list_empty(&rec->orphan_extents))
823                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824                         list_del(&orig->list);
825                         free(orig);
826                 }
827
828         free(rec);
829
830         return ERR_PTR(ret);
831 }
832
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
834                                       u64 objectid)
835 {
836         struct orphan_data_extent *orphan;
837
838         if (list_empty(orphan_extents))
839                 return;
840         printf("The following data extent is lost in tree %llu:\n",
841                objectid);
842         list_for_each_entry(orphan, orphan_extents, list) {
843                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
845                        orphan->disk_len);
846         }
847 }
848
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
850 {
851         u64 root_objectid = root->root_key.objectid;
852         int errors = rec->errors;
853
854         if (!errors)
855                 return;
856         /* reloc root errors, we print its corresponding fs root objectid*/
857         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858                 root_objectid = root->root_key.offset;
859                 fprintf(stderr, "reloc");
860         }
861         fprintf(stderr, "root %llu inode %llu errors %x",
862                 (unsigned long long) root_objectid,
863                 (unsigned long long) rec->ino, rec->errors);
864
865         if (errors & I_ERR_NO_INODE_ITEM)
866                 fprintf(stderr, ", no inode item");
867         if (errors & I_ERR_NO_ORPHAN_ITEM)
868                 fprintf(stderr, ", no orphan item");
869         if (errors & I_ERR_DUP_INODE_ITEM)
870                 fprintf(stderr, ", dup inode item");
871         if (errors & I_ERR_DUP_DIR_INDEX)
872                 fprintf(stderr, ", dup dir index");
873         if (errors & I_ERR_ODD_DIR_ITEM)
874                 fprintf(stderr, ", odd dir item");
875         if (errors & I_ERR_ODD_FILE_EXTENT)
876                 fprintf(stderr, ", odd file extent");
877         if (errors & I_ERR_BAD_FILE_EXTENT)
878                 fprintf(stderr, ", bad file extent");
879         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880                 fprintf(stderr, ", file extent overlap");
881         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882                 fprintf(stderr, ", file extent discount");
883         if (errors & I_ERR_DIR_ISIZE_WRONG)
884                 fprintf(stderr, ", dir isize wrong");
885         if (errors & I_ERR_FILE_NBYTES_WRONG)
886                 fprintf(stderr, ", nbytes wrong");
887         if (errors & I_ERR_ODD_CSUM_ITEM)
888                 fprintf(stderr, ", odd csum item");
889         if (errors & I_ERR_SOME_CSUM_MISSING)
890                 fprintf(stderr, ", some csum missing");
891         if (errors & I_ERR_LINK_COUNT_WRONG)
892                 fprintf(stderr, ", link count wrong");
893         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894                 fprintf(stderr, ", orphan file extent");
895         fprintf(stderr, "\n");
896         /* Print the orphan extents if needed */
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
899
900         /* Print the holes if needed */
901         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902                 struct file_extent_hole *hole;
903                 struct rb_node *node;
904                 int found = 0;
905
906                 node = rb_first(&rec->holes);
907                 fprintf(stderr, "Found file extent holes:\n");
908                 while (node) {
909                         found = 1;
910                         hole = rb_entry(node, struct file_extent_hole, node);
911                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
912                                 hole->start, hole->len);
913                         node = rb_next(node);
914                 }
915                 if (!found)
916                         fprintf(stderr, "\tstart: 0, len: %llu\n",
917                                 round_up(rec->isize,
918                                          root->fs_info->sectorsize));
919         }
920 }
921
922 static void print_ref_error(int errors)
923 {
924         if (errors & REF_ERR_NO_DIR_ITEM)
925                 fprintf(stderr, ", no dir item");
926         if (errors & REF_ERR_NO_DIR_INDEX)
927                 fprintf(stderr, ", no dir index");
928         if (errors & REF_ERR_NO_INODE_REF)
929                 fprintf(stderr, ", no inode ref");
930         if (errors & REF_ERR_DUP_DIR_ITEM)
931                 fprintf(stderr, ", dup dir item");
932         if (errors & REF_ERR_DUP_DIR_INDEX)
933                 fprintf(stderr, ", dup dir index");
934         if (errors & REF_ERR_DUP_INODE_REF)
935                 fprintf(stderr, ", dup inode ref");
936         if (errors & REF_ERR_INDEX_UNMATCH)
937                 fprintf(stderr, ", index mismatch");
938         if (errors & REF_ERR_FILETYPE_UNMATCH)
939                 fprintf(stderr, ", filetype mismatch");
940         if (errors & REF_ERR_NAME_TOO_LONG)
941                 fprintf(stderr, ", name too long");
942         if (errors & REF_ERR_NO_ROOT_REF)
943                 fprintf(stderr, ", no root ref");
944         if (errors & REF_ERR_NO_ROOT_BACKREF)
945                 fprintf(stderr, ", no root backref");
946         if (errors & REF_ERR_DUP_ROOT_REF)
947                 fprintf(stderr, ", dup root ref");
948         if (errors & REF_ERR_DUP_ROOT_BACKREF)
949                 fprintf(stderr, ", dup root backref");
950         fprintf(stderr, "\n");
951 }
952
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
954                                           u64 ino, int mod)
955 {
956         struct ptr_node *node;
957         struct cache_extent *cache;
958         struct inode_record *rec = NULL;
959         int ret;
960
961         cache = lookup_cache_extent(inode_cache, ino, 1);
962         if (cache) {
963                 node = container_of(cache, struct ptr_node, cache);
964                 rec = node->data;
965                 if (mod && rec->refs > 1) {
966                         node->data = clone_inode_rec(rec);
967                         if (IS_ERR(node->data))
968                                 return node->data;
969                         rec->refs--;
970                         rec = node->data;
971                 }
972         } else if (mod) {
973                 rec = calloc(1, sizeof(*rec));
974                 if (!rec)
975                         return ERR_PTR(-ENOMEM);
976                 rec->ino = ino;
977                 rec->extent_start = (u64)-1;
978                 rec->refs = 1;
979                 INIT_LIST_HEAD(&rec->backrefs);
980                 INIT_LIST_HEAD(&rec->orphan_extents);
981                 rec->holes = RB_ROOT;
982
983                 node = malloc(sizeof(*node));
984                 if (!node) {
985                         free(rec);
986                         return ERR_PTR(-ENOMEM);
987                 }
988                 node->cache.start = ino;
989                 node->cache.size = 1;
990                 node->data = rec;
991
992                 if (ino == BTRFS_FREE_INO_OBJECTID)
993                         rec->found_link = 1;
994
995                 ret = insert_cache_extent(inode_cache, &node->cache);
996                 if (ret)
997                         return ERR_PTR(-EEXIST);
998         }
999         return rec;
1000 }
1001
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1003 {
1004         struct orphan_data_extent *orphan;
1005
1006         while (!list_empty(orphan_extents)) {
1007                 orphan = list_entry(orphan_extents->next,
1008                                     struct orphan_data_extent, list);
1009                 list_del(&orphan->list);
1010                 free(orphan);
1011         }
1012 }
1013
1014 static void free_inode_rec(struct inode_record *rec)
1015 {
1016         struct inode_backref *backref;
1017
1018         if (--rec->refs > 0)
1019                 return;
1020
1021         while (!list_empty(&rec->backrefs)) {
1022                 backref = to_inode_backref(rec->backrefs.next);
1023                 list_del(&backref->list);
1024                 free(backref);
1025         }
1026         free_orphan_data_extents(&rec->orphan_extents);
1027         free_file_extent_holes(&rec->holes);
1028         free(rec);
1029 }
1030
1031 static int can_free_inode_rec(struct inode_record *rec)
1032 {
1033         if (!rec->errors && rec->checked && rec->found_inode_item &&
1034             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1035                 return 1;
1036         return 0;
1037 }
1038
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040                                  struct inode_record *rec)
1041 {
1042         struct cache_extent *cache;
1043         struct inode_backref *tmp, *backref;
1044         struct ptr_node *node;
1045         u8 filetype;
1046
1047         if (!rec->found_inode_item)
1048                 return;
1049
1050         filetype = imode_to_type(rec->imode);
1051         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052                 if (backref->found_dir_item && backref->found_dir_index) {
1053                         if (backref->filetype != filetype)
1054                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055                         if (!backref->errors && backref->found_inode_ref &&
1056                             rec->nlink == rec->found_link) {
1057                                 list_del(&backref->list);
1058                                 free(backref);
1059                         }
1060                 }
1061         }
1062
1063         if (!rec->checked || rec->merging)
1064                 return;
1065
1066         if (S_ISDIR(rec->imode)) {
1067                 if (rec->found_size != rec->isize)
1068                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069                 if (rec->found_file_extent)
1070                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072                 if (rec->found_dir_item)
1073                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1074                 if (rec->found_size != rec->nbytes)
1075                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076                 if (rec->nlink > 0 && !no_holes &&
1077                     (rec->extent_end < rec->isize ||
1078                      first_extent_gap(&rec->holes) < rec->isize))
1079                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1080         }
1081
1082         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083                 if (rec->found_csum_item && rec->nodatasum)
1084                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085                 if (rec->some_csum_missing && !rec->nodatasum)
1086                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1087         }
1088
1089         BUG_ON(rec->refs != 1);
1090         if (can_free_inode_rec(rec)) {
1091                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092                 node = container_of(cache, struct ptr_node, cache);
1093                 BUG_ON(node->data != rec);
1094                 remove_cache_extent(inode_cache, &node->cache);
1095                 free(node);
1096                 free_inode_rec(rec);
1097         }
1098 }
1099
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1101 {
1102         struct btrfs_path path;
1103         struct btrfs_key key;
1104         int ret;
1105
1106         key.objectid = BTRFS_ORPHAN_OBJECTID;
1107         key.type = BTRFS_ORPHAN_ITEM_KEY;
1108         key.offset = ino;
1109
1110         btrfs_init_path(&path);
1111         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112         btrfs_release_path(&path);
1113         if (ret > 0)
1114                 ret = -ENOENT;
1115         return ret;
1116 }
1117
1118 static int process_inode_item(struct extent_buffer *eb,
1119                               int slot, struct btrfs_key *key,
1120                               struct shared_node *active_node)
1121 {
1122         struct inode_record *rec;
1123         struct btrfs_inode_item *item;
1124
1125         rec = active_node->current;
1126         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127         if (rec->found_inode_item) {
1128                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1129                 return 1;
1130         }
1131         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132         rec->nlink = btrfs_inode_nlink(eb, item);
1133         rec->isize = btrfs_inode_size(eb, item);
1134         rec->nbytes = btrfs_inode_nbytes(eb, item);
1135         rec->imode = btrfs_inode_mode(eb, item);
1136         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1137                 rec->nodatasum = 1;
1138         rec->found_inode_item = 1;
1139         if (rec->nlink == 0)
1140                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141         maybe_free_inode_rec(&active_node->inode_cache, rec);
1142         return 0;
1143 }
1144
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1146                                                 const char *name,
1147                                                 int namelen, u64 dir)
1148 {
1149         struct inode_backref *backref;
1150
1151         list_for_each_entry(backref, &rec->backrefs, list) {
1152                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1153                         break;
1154                 if (backref->dir != dir || backref->namelen != namelen)
1155                         continue;
1156                 if (memcmp(name, backref->name, namelen))
1157                         continue;
1158                 return backref;
1159         }
1160
1161         backref = malloc(sizeof(*backref) + namelen + 1);
1162         if (!backref)
1163                 return NULL;
1164         memset(backref, 0, sizeof(*backref));
1165         backref->dir = dir;
1166         backref->namelen = namelen;
1167         memcpy(backref->name, name, namelen);
1168         backref->name[namelen] = '\0';
1169         list_add_tail(&backref->list, &rec->backrefs);
1170         return backref;
1171 }
1172
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174                              u64 ino, u64 dir, u64 index,
1175                              const char *name, int namelen,
1176                              u8 filetype, u8 itemtype, int errors)
1177 {
1178         struct inode_record *rec;
1179         struct inode_backref *backref;
1180
1181         rec = get_inode_rec(inode_cache, ino, 1);
1182         BUG_ON(IS_ERR(rec));
1183         backref = get_inode_backref(rec, name, namelen, dir);
1184         BUG_ON(!backref);
1185         if (errors)
1186                 backref->errors |= errors;
1187         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188                 if (backref->found_dir_index)
1189                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190                 if (backref->found_inode_ref && backref->index != index)
1191                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1192                 if (backref->found_dir_item && backref->filetype != filetype)
1193                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1194
1195                 backref->index = index;
1196                 backref->filetype = filetype;
1197                 backref->found_dir_index = 1;
1198         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1199                 rec->found_link++;
1200                 if (backref->found_dir_item)
1201                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202                 if (backref->found_dir_index && backref->filetype != filetype)
1203                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1204
1205                 backref->filetype = filetype;
1206                 backref->found_dir_item = 1;
1207         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209                 if (backref->found_inode_ref)
1210                         backref->errors |= REF_ERR_DUP_INODE_REF;
1211                 if (backref->found_dir_index && backref->index != index)
1212                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1213                 else
1214                         backref->index = index;
1215
1216                 backref->ref_type = itemtype;
1217                 backref->found_inode_ref = 1;
1218         } else {
1219                 BUG_ON(1);
1220         }
1221
1222         maybe_free_inode_rec(inode_cache, rec);
1223         return 0;
1224 }
1225
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227                             struct cache_tree *dst_cache)
1228 {
1229         struct inode_backref *backref;
1230         u32 dir_count = 0;
1231         int ret = 0;
1232
1233         dst->merging = 1;
1234         list_for_each_entry(backref, &src->backrefs, list) {
1235                 if (backref->found_dir_index) {
1236                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1237                                         backref->index, backref->name,
1238                                         backref->namelen, backref->filetype,
1239                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1240                 }
1241                 if (backref->found_dir_item) {
1242                         dir_count++;
1243                         add_inode_backref(dst_cache, dst->ino,
1244                                         backref->dir, 0, backref->name,
1245                                         backref->namelen, backref->filetype,
1246                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1247                 }
1248                 if (backref->found_inode_ref) {
1249                         add_inode_backref(dst_cache, dst->ino,
1250                                         backref->dir, backref->index,
1251                                         backref->name, backref->namelen, 0,
1252                                         backref->ref_type, backref->errors);
1253                 }
1254         }
1255
1256         if (src->found_dir_item)
1257                 dst->found_dir_item = 1;
1258         if (src->found_file_extent)
1259                 dst->found_file_extent = 1;
1260         if (src->found_csum_item)
1261                 dst->found_csum_item = 1;
1262         if (src->some_csum_missing)
1263                 dst->some_csum_missing = 1;
1264         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1266                 if (ret < 0)
1267                         return ret;
1268         }
1269
1270         BUG_ON(src->found_link < dir_count);
1271         dst->found_link += src->found_link - dir_count;
1272         dst->found_size += src->found_size;
1273         if (src->extent_start != (u64)-1) {
1274                 if (dst->extent_start == (u64)-1) {
1275                         dst->extent_start = src->extent_start;
1276                         dst->extent_end = src->extent_end;
1277                 } else {
1278                         if (dst->extent_end > src->extent_start)
1279                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280                         else if (dst->extent_end < src->extent_start) {
1281                                 ret = add_file_extent_hole(&dst->holes,
1282                                         dst->extent_end,
1283                                         src->extent_start - dst->extent_end);
1284                         }
1285                         if (dst->extent_end < src->extent_end)
1286                                 dst->extent_end = src->extent_end;
1287                 }
1288         }
1289
1290         dst->errors |= src->errors;
1291         if (src->found_inode_item) {
1292                 if (!dst->found_inode_item) {
1293                         dst->nlink = src->nlink;
1294                         dst->isize = src->isize;
1295                         dst->nbytes = src->nbytes;
1296                         dst->imode = src->imode;
1297                         dst->nodatasum = src->nodatasum;
1298                         dst->found_inode_item = 1;
1299                 } else {
1300                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1301                 }
1302         }
1303         dst->merging = 0;
1304
1305         return 0;
1306 }
1307
1308 static int splice_shared_node(struct shared_node *src_node,
1309                               struct shared_node *dst_node)
1310 {
1311         struct cache_extent *cache;
1312         struct ptr_node *node, *ins;
1313         struct cache_tree *src, *dst;
1314         struct inode_record *rec, *conflict;
1315         u64 current_ino = 0;
1316         int splice = 0;
1317         int ret;
1318
1319         if (--src_node->refs == 0)
1320                 splice = 1;
1321         if (src_node->current)
1322                 current_ino = src_node->current->ino;
1323
1324         src = &src_node->root_cache;
1325         dst = &dst_node->root_cache;
1326 again:
1327         cache = search_cache_extent(src, 0);
1328         while (cache) {
1329                 node = container_of(cache, struct ptr_node, cache);
1330                 rec = node->data;
1331                 cache = next_cache_extent(cache);
1332
1333                 if (splice) {
1334                         remove_cache_extent(src, &node->cache);
1335                         ins = node;
1336                 } else {
1337                         ins = malloc(sizeof(*ins));
1338                         BUG_ON(!ins);
1339                         ins->cache.start = node->cache.start;
1340                         ins->cache.size = node->cache.size;
1341                         ins->data = rec;
1342                         rec->refs++;
1343                 }
1344                 ret = insert_cache_extent(dst, &ins->cache);
1345                 if (ret == -EEXIST) {
1346                         conflict = get_inode_rec(dst, rec->ino, 1);
1347                         BUG_ON(IS_ERR(conflict));
1348                         merge_inode_recs(rec, conflict, dst);
1349                         if (rec->checked) {
1350                                 conflict->checked = 1;
1351                                 if (dst_node->current == conflict)
1352                                         dst_node->current = NULL;
1353                         }
1354                         maybe_free_inode_rec(dst, conflict);
1355                         free_inode_rec(rec);
1356                         free(ins);
1357                 } else {
1358                         BUG_ON(ret);
1359                 }
1360         }
1361
1362         if (src == &src_node->root_cache) {
1363                 src = &src_node->inode_cache;
1364                 dst = &dst_node->inode_cache;
1365                 goto again;
1366         }
1367
1368         if (current_ino > 0 && (!dst_node->current ||
1369             current_ino > dst_node->current->ino)) {
1370                 if (dst_node->current) {
1371                         dst_node->current->checked = 1;
1372                         maybe_free_inode_rec(dst, dst_node->current);
1373                 }
1374                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375                 BUG_ON(IS_ERR(dst_node->current));
1376         }
1377         return 0;
1378 }
1379
1380 static void free_inode_ptr(struct cache_extent *cache)
1381 {
1382         struct ptr_node *node;
1383         struct inode_record *rec;
1384
1385         node = container_of(cache, struct ptr_node, cache);
1386         rec = node->data;
1387         free_inode_rec(rec);
1388         free(node);
1389 }
1390
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1392
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1394                                             u64 bytenr)
1395 {
1396         struct cache_extent *cache;
1397         struct shared_node *node;
1398
1399         cache = lookup_cache_extent(shared, bytenr, 1);
1400         if (cache) {
1401                 node = container_of(cache, struct shared_node, cache);
1402                 return node;
1403         }
1404         return NULL;
1405 }
1406
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1408 {
1409         int ret;
1410         struct shared_node *node;
1411
1412         node = calloc(1, sizeof(*node));
1413         if (!node)
1414                 return -ENOMEM;
1415         node->cache.start = bytenr;
1416         node->cache.size = 1;
1417         cache_tree_init(&node->root_cache);
1418         cache_tree_init(&node->inode_cache);
1419         node->refs = refs;
1420
1421         ret = insert_cache_extent(shared, &node->cache);
1422
1423         return ret;
1424 }
1425
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427                              struct walk_control *wc, int level)
1428 {
1429         struct shared_node *node;
1430         struct shared_node *dest;
1431         int ret;
1432
1433         if (level == wc->active_node)
1434                 return 0;
1435
1436         BUG_ON(wc->active_node <= level);
1437         node = find_shared_node(&wc->shared, bytenr);
1438         if (!node) {
1439                 ret = add_shared_node(&wc->shared, bytenr, refs);
1440                 BUG_ON(ret);
1441                 node = find_shared_node(&wc->shared, bytenr);
1442                 wc->nodes[level] = node;
1443                 wc->active_node = level;
1444                 return 0;
1445         }
1446
1447         if (wc->root_level == wc->active_node &&
1448             btrfs_root_refs(&root->root_item) == 0) {
1449                 if (--node->refs == 0) {
1450                         free_inode_recs_tree(&node->root_cache);
1451                         free_inode_recs_tree(&node->inode_cache);
1452                         remove_cache_extent(&wc->shared, &node->cache);
1453                         free(node);
1454                 }
1455                 return 1;
1456         }
1457
1458         dest = wc->nodes[wc->active_node];
1459         splice_shared_node(node, dest);
1460         if (node->refs == 0) {
1461                 remove_cache_extent(&wc->shared, &node->cache);
1462                 free(node);
1463         }
1464         return 1;
1465 }
1466
1467 static int leave_shared_node(struct btrfs_root *root,
1468                              struct walk_control *wc, int level)
1469 {
1470         struct shared_node *node;
1471         struct shared_node *dest;
1472         int i;
1473
1474         if (level == wc->root_level)
1475                 return 0;
1476
1477         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1478                 if (wc->nodes[i])
1479                         break;
1480         }
1481         BUG_ON(i >= BTRFS_MAX_LEVEL);
1482
1483         node = wc->nodes[wc->active_node];
1484         wc->nodes[wc->active_node] = NULL;
1485         wc->active_node = i;
1486
1487         dest = wc->nodes[wc->active_node];
1488         if (wc->active_node < wc->root_level ||
1489             btrfs_root_refs(&root->root_item) > 0) {
1490                 BUG_ON(node->refs <= 1);
1491                 splice_shared_node(node, dest);
1492         } else {
1493                 BUG_ON(node->refs < 2);
1494                 node->refs--;
1495         }
1496         return 0;
1497 }
1498
1499 /*
1500  * Returns:
1501  * < 0 - on error
1502  * 1   - if the root with id child_root_id is a child of root parent_root_id
1503  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1504  *       has other root(s) as parent(s)
1505  * 2   - if the root child_root_id doesn't have any parent roots
1506  */
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1508                          u64 child_root_id)
1509 {
1510         struct btrfs_path path;
1511         struct btrfs_key key;
1512         struct extent_buffer *leaf;
1513         int has_parent = 0;
1514         int ret;
1515
1516         btrfs_init_path(&path);
1517
1518         key.objectid = parent_root_id;
1519         key.type = BTRFS_ROOT_REF_KEY;
1520         key.offset = child_root_id;
1521         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1522                                 0, 0);
1523         if (ret < 0)
1524                 return ret;
1525         btrfs_release_path(&path);
1526         if (!ret)
1527                 return 1;
1528
1529         key.objectid = child_root_id;
1530         key.type = BTRFS_ROOT_BACKREF_KEY;
1531         key.offset = 0;
1532         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1533                                 0, 0);
1534         if (ret < 0)
1535                 goto out;
1536
1537         while (1) {
1538                 leaf = path.nodes[0];
1539                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1541                         if (ret)
1542                                 break;
1543                         leaf = path.nodes[0];
1544                 }
1545
1546                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547                 if (key.objectid != child_root_id ||
1548                     key.type != BTRFS_ROOT_BACKREF_KEY)
1549                         break;
1550
1551                 has_parent = 1;
1552
1553                 if (key.offset == parent_root_id) {
1554                         btrfs_release_path(&path);
1555                         return 1;
1556                 }
1557
1558                 path.slots[0]++;
1559         }
1560 out:
1561         btrfs_release_path(&path);
1562         if (ret < 0)
1563                 return ret;
1564         return has_parent ? 0 : 2;
1565 }
1566
1567 static int process_dir_item(struct extent_buffer *eb,
1568                             int slot, struct btrfs_key *key,
1569                             struct shared_node *active_node)
1570 {
1571         u32 total;
1572         u32 cur = 0;
1573         u32 len;
1574         u32 name_len;
1575         u32 data_len;
1576         int error;
1577         int nritems = 0;
1578         u8 filetype;
1579         struct btrfs_dir_item *di;
1580         struct inode_record *rec;
1581         struct cache_tree *root_cache;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_key location;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         root_cache = &active_node->root_cache;
1587         inode_cache = &active_node->inode_cache;
1588         rec = active_node->current;
1589         rec->found_dir_item = 1;
1590
1591         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592         total = btrfs_item_size_nr(eb, slot);
1593         while (cur < total) {
1594                 nritems++;
1595                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596                 name_len = btrfs_dir_name_len(eb, di);
1597                 data_len = btrfs_dir_data_len(eb, di);
1598                 filetype = btrfs_dir_type(eb, di);
1599
1600                 rec->found_size += name_len;
1601                 if (cur + sizeof(*di) + name_len > total ||
1602                     name_len > BTRFS_NAME_LEN) {
1603                         error = REF_ERR_NAME_TOO_LONG;
1604
1605                         if (cur + sizeof(*di) > total)
1606                                 break;
1607                         len = min_t(u32, total - cur - sizeof(*di),
1608                                     BTRFS_NAME_LEN);
1609                 } else {
1610                         len = name_len;
1611                         error = 0;
1612                 }
1613
1614                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1615
1616                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617                     key->offset != btrfs_name_hash(namebuf, len)) {
1618                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1619                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620                         key->objectid, key->offset, namebuf, len, filetype,
1621                         key->offset, btrfs_name_hash(namebuf, len));
1622                 }
1623
1624                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625                         add_inode_backref(inode_cache, location.objectid,
1626                                           key->objectid, key->offset, namebuf,
1627                                           len, filetype, key->type, error);
1628                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629                         add_inode_backref(root_cache, location.objectid,
1630                                           key->objectid, key->offset,
1631                                           namebuf, len, filetype,
1632                                           key->type, error);
1633                 } else {
1634                         fprintf(stderr, "invalid location in dir item %u\n",
1635                                 location.type);
1636                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637                                           key->objectid, key->offset, namebuf,
1638                                           len, filetype, key->type, error);
1639                 }
1640
1641                 len = sizeof(*di) + name_len + data_len;
1642                 di = (struct btrfs_dir_item *)((char *)di + len);
1643                 cur += len;
1644         }
1645         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1647
1648         return 0;
1649 }
1650
1651 static int process_inode_ref(struct extent_buffer *eb,
1652                              int slot, struct btrfs_key *key,
1653                              struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         int error;
1661         struct cache_tree *inode_cache;
1662         struct btrfs_inode_ref *ref;
1663         char namebuf[BTRFS_NAME_LEN];
1664
1665         inode_cache = &active_node->inode_cache;
1666
1667         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668         total = btrfs_item_size_nr(eb, slot);
1669         while (cur < total) {
1670                 name_len = btrfs_inode_ref_name_len(eb, ref);
1671                 index = btrfs_inode_ref_index(eb, ref);
1672
1673                 /* inode_ref + namelen should not cross item boundary */
1674                 if (cur + sizeof(*ref) + name_len > total ||
1675                     name_len > BTRFS_NAME_LEN) {
1676                         if (total < cur + sizeof(*ref))
1677                                 break;
1678
1679                         /* Still try to read out the remaining part */
1680                         len = min_t(u32, total - cur - sizeof(*ref),
1681                                     BTRFS_NAME_LEN);
1682                         error = REF_ERR_NAME_TOO_LONG;
1683                 } else {
1684                         len = name_len;
1685                         error = 0;
1686                 }
1687
1688                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689                 add_inode_backref(inode_cache, key->objectid, key->offset,
1690                                   index, namebuf, len, 0, key->type, error);
1691
1692                 len = sizeof(*ref) + name_len;
1693                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1694                 cur += len;
1695         }
1696         return 0;
1697 }
1698
1699 static int process_inode_extref(struct extent_buffer *eb,
1700                                 int slot, struct btrfs_key *key,
1701                                 struct shared_node *active_node)
1702 {
1703         u32 total;
1704         u32 cur = 0;
1705         u32 len;
1706         u32 name_len;
1707         u64 index;
1708         u64 parent;
1709         int error;
1710         struct cache_tree *inode_cache;
1711         struct btrfs_inode_extref *extref;
1712         char namebuf[BTRFS_NAME_LEN];
1713
1714         inode_cache = &active_node->inode_cache;
1715
1716         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717         total = btrfs_item_size_nr(eb, slot);
1718         while (cur < total) {
1719                 name_len = btrfs_inode_extref_name_len(eb, extref);
1720                 index = btrfs_inode_extref_index(eb, extref);
1721                 parent = btrfs_inode_extref_parent(eb, extref);
1722                 if (name_len <= BTRFS_NAME_LEN) {
1723                         len = name_len;
1724                         error = 0;
1725                 } else {
1726                         len = BTRFS_NAME_LEN;
1727                         error = REF_ERR_NAME_TOO_LONG;
1728                 }
1729                 read_extent_buffer(eb, namebuf,
1730                                    (unsigned long)(extref + 1), len);
1731                 add_inode_backref(inode_cache, key->objectid, parent,
1732                                   index, namebuf, len, 0, key->type, error);
1733
1734                 len = sizeof(*extref) + name_len;
1735                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1736                 cur += len;
1737         }
1738         return 0;
1739
1740 }
1741
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743                             u64 len, u64 *found)
1744 {
1745         struct btrfs_key key;
1746         struct btrfs_path path;
1747         struct extent_buffer *leaf;
1748         int ret;
1749         size_t size;
1750         *found = 0;
1751         u64 csum_end;
1752         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1753
1754         btrfs_init_path(&path);
1755
1756         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1757         key.offset = start;
1758         key.type = BTRFS_EXTENT_CSUM_KEY;
1759
1760         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1761                                 &key, &path, 0, 0);
1762         if (ret < 0)
1763                 goto out;
1764         if (ret > 0 && path.slots[0] > 0) {
1765                 leaf = path.nodes[0];
1766                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768                     key.type == BTRFS_EXTENT_CSUM_KEY)
1769                         path.slots[0]--;
1770         }
1771
1772         while (len > 0) {
1773                 leaf = path.nodes[0];
1774                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1776                         if (ret > 0)
1777                                 break;
1778                         else if (ret < 0)
1779                                 goto out;
1780                         leaf = path.nodes[0];
1781                 }
1782
1783                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785                     key.type != BTRFS_EXTENT_CSUM_KEY)
1786                         break;
1787
1788                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789                 if (key.offset >= start + len)
1790                         break;
1791
1792                 if (key.offset > start)
1793                         start = key.offset;
1794
1795                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796                 csum_end = key.offset + (size / csum_size) *
1797                            root->fs_info->sectorsize;
1798                 if (csum_end > start) {
1799                         size = min(csum_end - start, len);
1800                         len -= size;
1801                         start += size;
1802                         *found += size;
1803                 }
1804
1805                 path.slots[0]++;
1806         }
1807 out:
1808         btrfs_release_path(&path);
1809         if (ret < 0)
1810                 return ret;
1811         return 0;
1812 }
1813
1814 static int process_file_extent(struct btrfs_root *root,
1815                                 struct extent_buffer *eb,
1816                                 int slot, struct btrfs_key *key,
1817                                 struct shared_node *active_node)
1818 {
1819         struct inode_record *rec;
1820         struct btrfs_file_extent_item *fi;
1821         u64 num_bytes = 0;
1822         u64 disk_bytenr = 0;
1823         u64 extent_offset = 0;
1824         u64 mask = root->fs_info->sectorsize - 1;
1825         int extent_type;
1826         int ret;
1827
1828         rec = active_node->current;
1829         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830         rec->found_file_extent = 1;
1831
1832         if (rec->extent_start == (u64)-1) {
1833                 rec->extent_start = key->offset;
1834                 rec->extent_end = key->offset;
1835         }
1836
1837         if (rec->extent_end > key->offset)
1838                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839         else if (rec->extent_end < key->offset) {
1840                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841                                            key->offset - rec->extent_end);
1842                 if (ret < 0)
1843                         return ret;
1844         }
1845
1846         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847         extent_type = btrfs_file_extent_type(eb, fi);
1848
1849         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1851                 if (num_bytes == 0)
1852                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853                 rec->found_size += num_bytes;
1854                 num_bytes = (num_bytes + mask) & ~mask;
1855         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859                 extent_offset = btrfs_file_extent_offset(eb, fi);
1860                 if (num_bytes == 0 || (num_bytes & mask))
1861                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862                 if (num_bytes + extent_offset >
1863                     btrfs_file_extent_ram_bytes(eb, fi))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866                     (btrfs_file_extent_compression(eb, fi) ||
1867                      btrfs_file_extent_encryption(eb, fi) ||
1868                      btrfs_file_extent_other_encoding(eb, fi)))
1869                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870                 if (disk_bytenr > 0)
1871                         rec->found_size += num_bytes;
1872         } else {
1873                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874         }
1875         rec->extent_end = key->offset + num_bytes;
1876
1877         /*
1878          * The data reloc tree will copy full extents into its inode and then
1879          * copy the corresponding csums.  Because the extent it copied could be
1880          * a preallocated extent that hasn't been written to yet there may be no
1881          * csums to copy, ergo we won't have csums for our file extent.  This is
1882          * ok so just don't bother checking csums if the inode belongs to the
1883          * data reloc tree.
1884          */
1885         if (disk_bytenr > 0 &&
1886             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1887                 u64 found;
1888                 if (btrfs_file_extent_compression(eb, fi))
1889                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1890                 else
1891                         disk_bytenr += extent_offset;
1892
1893                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1894                 if (ret < 0)
1895                         return ret;
1896                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1897                         if (found > 0)
1898                                 rec->found_csum_item = 1;
1899                         if (found < num_bytes)
1900                                 rec->some_csum_missing = 1;
1901                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1902                         if (found > 0)
1903                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1904                 }
1905         }
1906         return 0;
1907 }
1908
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910                             struct walk_control *wc)
1911 {
1912         struct btrfs_key key;
1913         u32 nritems;
1914         int i;
1915         int ret = 0;
1916         struct cache_tree *inode_cache;
1917         struct shared_node *active_node;
1918
1919         if (wc->root_level == wc->active_node &&
1920             btrfs_root_refs(&root->root_item) == 0)
1921                 return 0;
1922
1923         active_node = wc->nodes[wc->active_node];
1924         inode_cache = &active_node->inode_cache;
1925         nritems = btrfs_header_nritems(eb);
1926         for (i = 0; i < nritems; i++) {
1927                 btrfs_item_key_to_cpu(eb, &key, i);
1928
1929                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1930                         continue;
1931                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1932                         continue;
1933
1934                 if (active_node->current == NULL ||
1935                     active_node->current->ino < key.objectid) {
1936                         if (active_node->current) {
1937                                 active_node->current->checked = 1;
1938                                 maybe_free_inode_rec(inode_cache,
1939                                                      active_node->current);
1940                         }
1941                         active_node->current = get_inode_rec(inode_cache,
1942                                                              key.objectid, 1);
1943                         BUG_ON(IS_ERR(active_node->current));
1944                 }
1945                 switch (key.type) {
1946                 case BTRFS_DIR_ITEM_KEY:
1947                 case BTRFS_DIR_INDEX_KEY:
1948                         ret = process_dir_item(eb, i, &key, active_node);
1949                         break;
1950                 case BTRFS_INODE_REF_KEY:
1951                         ret = process_inode_ref(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_EXTREF_KEY:
1954                         ret = process_inode_extref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_ITEM_KEY:
1957                         ret = process_inode_item(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_EXTENT_DATA_KEY:
1960                         ret = process_file_extent(root, eb, i, &key,
1961                                                   active_node);
1962                         break;
1963                 default:
1964                         break;
1965                 };
1966         }
1967         return ret;
1968 }
1969
1970 struct node_refs {
1971         u64 bytenr[BTRFS_MAX_LEVEL];
1972         u64 refs[BTRFS_MAX_LEVEL];
1973         int need_check[BTRFS_MAX_LEVEL];
1974 };
1975
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977                              struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979                             unsigned int ext_ref);
1980
1981 /*
1982  * Returns >0  Found error, not fatal, should continue
1983  * Returns <0  Fatal error, must exit the whole check
1984  * Returns 0   No errors found
1985  */
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987                                struct node_refs *nrefs, int *level, int ext_ref)
1988 {
1989         struct extent_buffer *cur = path->nodes[0];
1990         struct btrfs_key key;
1991         u64 cur_bytenr;
1992         u32 nritems;
1993         u64 first_ino = 0;
1994         int root_level = btrfs_header_level(root->node);
1995         int i;
1996         int ret = 0; /* Final return value */
1997         int err = 0; /* Positive error bitmap */
1998
1999         cur_bytenr = cur->start;
2000
2001         /* skip to first inode item or the first inode number change */
2002         nritems = btrfs_header_nritems(cur);
2003         for (i = 0; i < nritems; i++) {
2004                 btrfs_item_key_to_cpu(cur, &key, i);
2005                 if (i == 0)
2006                         first_ino = key.objectid;
2007                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008                     (first_ino && first_ino != key.objectid))
2009                         break;
2010         }
2011         if (i == nritems) {
2012                 path->slots[0] = nritems;
2013                 return 0;
2014         }
2015         path->slots[0] = i;
2016
2017 again:
2018         err |= check_inode_item(root, path, ext_ref);
2019
2020         /* modify cur since check_inode_item may change path */
2021         cur = path->nodes[0];
2022
2023         if (err & LAST_ITEM)
2024                 goto out;
2025
2026         /* still have inode items in thie leaf */
2027         if (cur->start == cur_bytenr)
2028                 goto again;
2029
2030         /*
2031          * we have switched to another leaf, above nodes may
2032          * have changed, here walk down the path, if a node
2033          * or leaf is shared, check whether we can skip this
2034          * node or leaf.
2035          */
2036         for (i = root_level; i >= 0; i--) {
2037                 if (path->nodes[i]->start == nrefs->bytenr[i])
2038                         continue;
2039
2040                 ret = update_nodes_refs(root,
2041                                 path->nodes[i]->start,
2042                                 nrefs, i);
2043                 if (ret)
2044                         goto out;
2045
2046                 if (!nrefs->need_check[i]) {
2047                         *level += 1;
2048                         break;
2049                 }
2050         }
2051
2052         for (i = 0; i < *level; i++) {
2053                 free_extent_buffer(path->nodes[i]);
2054                 path->nodes[i] = NULL;
2055         }
2056 out:
2057         err &= ~LAST_ITEM;
2058         if (err && !ret)
2059                 ret = err;
2060         return ret;
2061 }
2062
2063 static void reada_walk_down(struct btrfs_root *root,
2064                             struct extent_buffer *node, int slot)
2065 {
2066         struct btrfs_fs_info *fs_info = root->fs_info;
2067         u64 bytenr;
2068         u64 ptr_gen;
2069         u32 nritems;
2070         int i;
2071         int level;
2072
2073         level = btrfs_header_level(node);
2074         if (level != 1)
2075                 return;
2076
2077         nritems = btrfs_header_nritems(node);
2078         for (i = slot; i < nritems; i++) {
2079                 bytenr = btrfs_node_blockptr(node, i);
2080                 ptr_gen = btrfs_node_ptr_generation(node, i);
2081                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2082         }
2083 }
2084
2085 /*
2086  * Check the child node/leaf by the following condition:
2087  * 1. the first item key of the node/leaf should be the same with the one
2088  *    in parent.
2089  * 2. block in parent node should match the child node/leaf.
2090  * 3. generation of parent node and child's header should be consistent.
2091  *
2092  * Or the child node/leaf pointed by the key in parent is not valid.
2093  *
2094  * We hope to check leaf owner too, but since subvol may share leaves,
2095  * which makes leaf owner check not so strong, key check should be
2096  * sufficient enough for that case.
2097  */
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099                             struct extent_buffer *child)
2100 {
2101         struct btrfs_key parent_key;
2102         struct btrfs_key child_key;
2103         int ret = 0;
2104
2105         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106         if (btrfs_header_level(child) == 0)
2107                 btrfs_item_key_to_cpu(child, &child_key, 0);
2108         else
2109                 btrfs_node_key_to_cpu(child, &child_key, 0);
2110
2111         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2112                 ret = -EINVAL;
2113                 fprintf(stderr,
2114                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115                         parent_key.objectid, parent_key.type, parent_key.offset,
2116                         child_key.objectid, child_key.type, child_key.offset);
2117         }
2118         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2119                 ret = -EINVAL;
2120                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121                         btrfs_node_blockptr(parent, slot),
2122                         btrfs_header_bytenr(child));
2123         }
2124         if (btrfs_node_ptr_generation(parent, slot) !=
2125             btrfs_header_generation(child)) {
2126                 ret = -EINVAL;
2127                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128                         btrfs_header_generation(child),
2129                         btrfs_node_ptr_generation(parent, slot));
2130         }
2131         return ret;
2132 }
2133
2134 /*
2135  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136  * in every fs or file tree check. Here we find its all root ids, and only check
2137  * it in the fs or file tree which has the smallest root id.
2138  */
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2140 {
2141         struct rb_node *node;
2142         struct ulist_node *u;
2143
2144         if (roots->nnodes == 1)
2145                 return 1;
2146
2147         node = rb_first(&roots->root);
2148         u = rb_entry(node, struct ulist_node, rb_node);
2149         /*
2150          * current root id is not smallest, we skip it and let it be checked
2151          * in the fs or file tree who hash the smallest root id.
2152          */
2153         if (root->objectid != u->val)
2154                 return 0;
2155
2156         return 1;
2157 }
2158
2159 /*
2160  * for a tree node or leaf, we record its reference count, so later if we still
2161  * process this node or leaf, don't need to compute its reference count again.
2162  */
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164                              struct node_refs *nrefs, u64 level)
2165 {
2166         int check, ret;
2167         u64 refs;
2168         struct ulist *roots;
2169
2170         if (nrefs->bytenr[level] != bytenr) {
2171                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172                                        level, 1, &refs, NULL);
2173                 if (ret < 0)
2174                         return ret;
2175
2176                 nrefs->bytenr[level] = bytenr;
2177                 nrefs->refs[level] = refs;
2178                 if (refs > 1) {
2179                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2180                                                    0, &roots);
2181                         if (ret)
2182                                 return -EIO;
2183
2184                         check = need_check(root, roots);
2185                         ulist_free(roots);
2186                         nrefs->need_check[level] = check;
2187                 } else {
2188                         nrefs->need_check[level] = 1;
2189                 }
2190         }
2191
2192         return 0;
2193 }
2194
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196                           struct walk_control *wc, int *level,
2197                           struct node_refs *nrefs)
2198 {
2199         enum btrfs_tree_block_status status;
2200         u64 bytenr;
2201         u64 ptr_gen;
2202         struct btrfs_fs_info *fs_info = root->fs_info;
2203         struct extent_buffer *next;
2204         struct extent_buffer *cur;
2205         int ret, err = 0;
2206         u64 refs;
2207
2208         WARN_ON(*level < 0);
2209         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2210
2211         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212                 refs = nrefs->refs[*level];
2213                 ret = 0;
2214         } else {
2215                 ret = btrfs_lookup_extent_info(NULL, root,
2216                                        path->nodes[*level]->start,
2217                                        *level, 1, &refs, NULL);
2218                 if (ret < 0) {
2219                         err = ret;
2220                         goto out;
2221                 }
2222                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223                 nrefs->refs[*level] = refs;
2224         }
2225
2226         if (refs > 1) {
2227                 ret = enter_shared_node(root, path->nodes[*level]->start,
2228                                         refs, wc, *level);
2229                 if (ret > 0) {
2230                         err = ret;
2231                         goto out;
2232                 }
2233         }
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 if (*level == 0) {
2246                         ret = process_one_leaf(root, cur, wc);
2247                         if (ret < 0)
2248                                 err = ret;
2249                         break;
2250                 }
2251                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2253
2254                 if (bytenr == nrefs->bytenr[*level - 1]) {
2255                         refs = nrefs->refs[*level - 1];
2256                 } else {
2257                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258                                         *level - 1, 1, &refs, NULL);
2259                         if (ret < 0) {
2260                                 refs = 0;
2261                         } else {
2262                                 nrefs->bytenr[*level - 1] = bytenr;
2263                                 nrefs->refs[*level - 1] = refs;
2264                         }
2265                 }
2266
2267                 if (refs > 1) {
2268                         ret = enter_shared_node(root, bytenr, refs,
2269                                                 wc, *level - 1);
2270                         if (ret > 0) {
2271                                 path->slots[*level]++;
2272                                 continue;
2273                         }
2274                 }
2275
2276                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278                         free_extent_buffer(next);
2279                         reada_walk_down(root, cur, path->slots[*level]);
2280                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281                         if (!extent_buffer_uptodate(next)) {
2282                                 struct btrfs_key node_key;
2283
2284                                 btrfs_node_key_to_cpu(path->nodes[*level],
2285                                                       &node_key,
2286                                                       path->slots[*level]);
2287                                 btrfs_add_corrupt_extent_record(root->fs_info,
2288                                                 &node_key,
2289                                                 path->nodes[*level]->start,
2290                                                 root->fs_info->nodesize,
2291                                                 *level);
2292                                 err = -EIO;
2293                                 goto out;
2294                         }
2295                 }
2296
2297                 ret = check_child_node(cur, path->slots[*level], next);
2298                 if (ret) {
2299                         free_extent_buffer(next);
2300                         err = ret;
2301                         goto out;
2302                 }
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         err = -EIO;
2311                         goto out;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319 out:
2320         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2321         return err;
2322 }
2323
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325                             unsigned int ext_ref);
2326
2327 /*
2328  * Returns >0  Found error, should continue
2329  * Returns <0  Fatal error, must exit the whole check
2330  * Returns 0   No errors found
2331  */
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333                              int *level, struct node_refs *nrefs, int ext_ref)
2334 {
2335         enum btrfs_tree_block_status status;
2336         u64 bytenr;
2337         u64 ptr_gen;
2338         struct btrfs_fs_info *fs_info = root->fs_info;
2339         struct extent_buffer *next;
2340         struct extent_buffer *cur;
2341         int ret;
2342
2343         WARN_ON(*level < 0);
2344         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2345
2346         ret = update_nodes_refs(root, path->nodes[*level]->start,
2347                                 nrefs, *level);
2348         if (ret < 0)
2349                 return ret;
2350
2351         while (*level >= 0) {
2352                 WARN_ON(*level < 0);
2353                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354                 cur = path->nodes[*level];
2355
2356                 if (btrfs_header_level(cur) != *level)
2357                         WARN_ON(1);
2358
2359                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2360                         break;
2361                 /* Don't forgot to check leaf/node validation */
2362                 if (*level == 0) {
2363                         ret = btrfs_check_leaf(root, NULL, cur);
2364                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2365                                 ret = -EIO;
2366                                 break;
2367                         }
2368                         ret = process_one_leaf_v2(root, path, nrefs,
2369                                                   level, ext_ref);
2370                         cur = path->nodes[*level];
2371                         break;
2372                 } else {
2373                         ret = btrfs_check_node(root, NULL, cur);
2374                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2375                                 ret = -EIO;
2376                                 break;
2377                         }
2378                 }
2379                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2381
2382                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2383                 if (ret)
2384                         break;
2385                 if (!nrefs->need_check[*level - 1]) {
2386                         path->slots[*level]++;
2387                         continue;
2388                 }
2389
2390                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392                         free_extent_buffer(next);
2393                         reada_walk_down(root, cur, path->slots[*level]);
2394                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2395                         if (!extent_buffer_uptodate(next)) {
2396                                 struct btrfs_key node_key;
2397
2398                                 btrfs_node_key_to_cpu(path->nodes[*level],
2399                                                       &node_key,
2400                                                       path->slots[*level]);
2401                                 btrfs_add_corrupt_extent_record(fs_info,
2402                                                 &node_key,
2403                                                 path->nodes[*level]->start,
2404                                                 fs_info->nodesize,
2405                                                 *level);
2406                                 ret = -EIO;
2407                                 break;
2408                         }
2409                 }
2410
2411                 ret = check_child_node(cur, path->slots[*level], next);
2412                 if (ret < 0) 
2413                         break;
2414
2415                 if (btrfs_is_leaf(next))
2416                         status = btrfs_check_leaf(root, NULL, next);
2417                 else
2418                         status = btrfs_check_node(root, NULL, next);
2419                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420                         free_extent_buffer(next);
2421                         ret = -EIO;
2422                         break;
2423                 }
2424
2425                 *level = *level - 1;
2426                 free_extent_buffer(path->nodes[*level]);
2427                 path->nodes[*level] = next;
2428                 path->slots[*level] = 0;
2429         }
2430         return ret;
2431 }
2432
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434                         struct walk_control *wc, int *level)
2435 {
2436         int i;
2437         struct extent_buffer *leaf;
2438
2439         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440                 leaf = path->nodes[i];
2441                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2442                         path->slots[i]++;
2443                         *level = i;
2444                         return 0;
2445                 } else {
2446                         free_extent_buffer(path->nodes[*level]);
2447                         path->nodes[*level] = NULL;
2448                         BUG_ON(*level > wc->active_node);
2449                         if (*level == wc->active_node)
2450                                 leave_shared_node(root, wc, *level);
2451                         *level = i + 1;
2452                 }
2453         }
2454         return 1;
2455 }
2456
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2458                            int *level)
2459 {
2460         int i;
2461         struct extent_buffer *leaf;
2462
2463         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464                 leaf = path->nodes[i];
2465                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2466                         path->slots[i]++;
2467                         *level = i;
2468                         return 0;
2469                 } else {
2470                         free_extent_buffer(path->nodes[*level]);
2471                         path->nodes[*level] = NULL;
2472                         *level = i + 1;
2473                 }
2474         }
2475         return 1;
2476 }
2477
2478 static int check_root_dir(struct inode_record *rec)
2479 {
2480         struct inode_backref *backref;
2481         int ret = -1;
2482
2483         if (!rec->found_inode_item || rec->errors)
2484                 goto out;
2485         if (rec->nlink != 1 || rec->found_link != 0)
2486                 goto out;
2487         if (list_empty(&rec->backrefs))
2488                 goto out;
2489         backref = to_inode_backref(rec->backrefs.next);
2490         if (!backref->found_inode_ref)
2491                 goto out;
2492         if (backref->index != 0 || backref->namelen != 2 ||
2493             memcmp(backref->name, "..", 2))
2494                 goto out;
2495         if (backref->found_dir_index || backref->found_dir_item)
2496                 goto out;
2497         ret = 0;
2498 out:
2499         return ret;
2500 }
2501
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503                               struct btrfs_root *root, struct btrfs_path *path,
2504                               struct inode_record *rec)
2505 {
2506         struct btrfs_inode_item *ei;
2507         struct btrfs_key key;
2508         int ret;
2509
2510         key.objectid = rec->ino;
2511         key.type = BTRFS_INODE_ITEM_KEY;
2512         key.offset = (u64)-1;
2513
2514         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2515         if (ret < 0)
2516                 goto out;
2517         if (ret) {
2518                 if (!path->slots[0]) {
2519                         ret = -ENOENT;
2520                         goto out;
2521                 }
2522                 path->slots[0]--;
2523                 ret = 0;
2524         }
2525         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526         if (key.objectid != rec->ino) {
2527                 ret = -ENOENT;
2528                 goto out;
2529         }
2530
2531         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532                             struct btrfs_inode_item);
2533         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534         btrfs_mark_buffer_dirty(path->nodes[0]);
2535         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537                root->root_key.objectid);
2538 out:
2539         btrfs_release_path(path);
2540         return ret;
2541 }
2542
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544                                     struct btrfs_root *root,
2545                                     struct btrfs_path *path,
2546                                     struct inode_record *rec)
2547 {
2548         int ret;
2549
2550         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551         btrfs_release_path(path);
2552         if (!ret)
2553                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2554         return ret;
2555 }
2556
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558                                struct btrfs_root *root,
2559                                struct btrfs_path *path,
2560                                struct inode_record *rec)
2561 {
2562         struct btrfs_inode_item *ei;
2563         struct btrfs_key key;
2564         int ret = 0;
2565
2566         key.objectid = rec->ino;
2567         key.type = BTRFS_INODE_ITEM_KEY;
2568         key.offset = 0;
2569
2570         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2571         if (ret) {
2572                 if (ret > 0)
2573                         ret = -ENOENT;
2574                 goto out;
2575         }
2576
2577         /* Since ret == 0, no need to check anything */
2578         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579                             struct btrfs_inode_item);
2580         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581         btrfs_mark_buffer_dirty(path->nodes[0]);
2582         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583         printf("reset nbytes for ino %llu root %llu\n",
2584                rec->ino, root->root_key.objectid);
2585 out:
2586         btrfs_release_path(path);
2587         return ret;
2588 }
2589
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591                                  struct cache_tree *inode_cache,
2592                                  struct inode_record *rec,
2593                                  struct inode_backref *backref)
2594 {
2595         struct btrfs_path path;
2596         struct btrfs_trans_handle *trans;
2597         struct btrfs_dir_item *dir_item;
2598         struct extent_buffer *leaf;
2599         struct btrfs_key key;
2600         struct btrfs_disk_key disk_key;
2601         struct inode_record *dir_rec;
2602         unsigned long name_ptr;
2603         u32 data_size = sizeof(*dir_item) + backref->namelen;
2604         int ret;
2605
2606         trans = btrfs_start_transaction(root, 1);
2607         if (IS_ERR(trans))
2608                 return PTR_ERR(trans);
2609
2610         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611                 (unsigned long long)rec->ino);
2612
2613         btrfs_init_path(&path);
2614         key.objectid = backref->dir;
2615         key.type = BTRFS_DIR_INDEX_KEY;
2616         key.offset = backref->index;
2617         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2618         BUG_ON(ret);
2619
2620         leaf = path.nodes[0];
2621         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2622
2623         disk_key.objectid = cpu_to_le64(rec->ino);
2624         disk_key.type = BTRFS_INODE_ITEM_KEY;
2625         disk_key.offset = 0;
2626
2627         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629         btrfs_set_dir_data_len(leaf, dir_item, 0);
2630         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631         name_ptr = (unsigned long)(dir_item + 1);
2632         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633         btrfs_mark_buffer_dirty(leaf);
2634         btrfs_release_path(&path);
2635         btrfs_commit_transaction(trans, root);
2636
2637         backref->found_dir_index = 1;
2638         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639         BUG_ON(IS_ERR(dir_rec));
2640         if (!dir_rec)
2641                 return 0;
2642         dir_rec->found_size += backref->namelen;
2643         if (dir_rec->found_size == dir_rec->isize &&
2644             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646         if (dir_rec->found_size != dir_rec->isize)
2647                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2648
2649         return 0;
2650 }
2651
2652 static int delete_dir_index(struct btrfs_root *root,
2653                             struct inode_backref *backref)
2654 {
2655         struct btrfs_trans_handle *trans;
2656         struct btrfs_dir_item *di;
2657         struct btrfs_path path;
2658         int ret = 0;
2659
2660         trans = btrfs_start_transaction(root, 1);
2661         if (IS_ERR(trans))
2662                 return PTR_ERR(trans);
2663
2664         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665                 (unsigned long long)backref->dir,
2666                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667                 (unsigned long long)root->objectid);
2668
2669         btrfs_init_path(&path);
2670         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671                                     backref->name, backref->namelen,
2672                                     backref->index, -1);
2673         if (IS_ERR(di)) {
2674                 ret = PTR_ERR(di);
2675                 btrfs_release_path(&path);
2676                 btrfs_commit_transaction(trans, root);
2677                 if (ret == -ENOENT)
2678                         return 0;
2679                 return ret;
2680         }
2681
2682         if (!di)
2683                 ret = btrfs_del_item(trans, root, &path);
2684         else
2685                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2686         BUG_ON(ret);
2687         btrfs_release_path(&path);
2688         btrfs_commit_transaction(trans, root);
2689         return ret;
2690 }
2691
2692 static int create_inode_item(struct btrfs_root *root,
2693                              struct inode_record *rec,
2694                              int root_dir)
2695 {
2696         struct btrfs_trans_handle *trans;
2697         struct btrfs_inode_item inode_item;
2698         time_t now = time(NULL);
2699         int ret;
2700
2701         trans = btrfs_start_transaction(root, 1);
2702         if (IS_ERR(trans)) {
2703                 ret = PTR_ERR(trans);
2704                 return ret;
2705         }
2706
2707         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708                 "be incomplete, please check permissions and content after "
2709                 "the fsck completes.\n", (unsigned long long)root->objectid,
2710                 (unsigned long long)rec->ino);
2711
2712         memset(&inode_item, 0, sizeof(inode_item));
2713         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2714         if (root_dir)
2715                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2716         else
2717                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719         if (rec->found_dir_item) {
2720                 if (rec->found_file_extent)
2721                         fprintf(stderr, "root %llu inode %llu has both a dir "
2722                                 "item and extents, unsure if it is a dir or a "
2723                                 "regular file so setting it as a directory\n",
2724                                 (unsigned long long)root->objectid,
2725                                 (unsigned long long)rec->ino);
2726                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728         } else if (!rec->found_dir_item) {
2729                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2731         }
2732         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2740
2741         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2742         BUG_ON(ret);
2743         btrfs_commit_transaction(trans, root);
2744         return 0;
2745 }
2746
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748                                  struct inode_record *rec,
2749                                  struct cache_tree *inode_cache,
2750                                  int delete)
2751 {
2752         struct inode_backref *tmp, *backref;
2753         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2754         int ret = 0;
2755         int repaired = 0;
2756
2757         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758                 if (!delete && rec->ino == root_dirid) {
2759                         if (!rec->found_inode_item) {
2760                                 ret = create_inode_item(root, rec, 1);
2761                                 if (ret)
2762                                         break;
2763                                 repaired++;
2764                         }
2765                 }
2766
2767                 /* Index 0 for root dir's are special, don't mess with it */
2768                 if (rec->ino == root_dirid && backref->index == 0)
2769                         continue;
2770
2771                 if (delete &&
2772                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2773                      (backref->found_dir_index && backref->found_inode_ref &&
2774                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775                         ret = delete_dir_index(root, backref);
2776                         if (ret)
2777                                 break;
2778                         repaired++;
2779                         list_del(&backref->list);
2780                         free(backref);
2781                         continue;
2782                 }
2783
2784                 if (!delete && !backref->found_dir_index &&
2785                     backref->found_dir_item && backref->found_inode_ref) {
2786                         ret = add_missing_dir_index(root, inode_cache, rec,
2787                                                     backref);
2788                         if (ret)
2789                                 break;
2790                         repaired++;
2791                         if (backref->found_dir_item &&
2792                             backref->found_dir_index) {
2793                                 if (!backref->errors &&
2794                                     backref->found_inode_ref) {
2795                                         list_del(&backref->list);
2796                                         free(backref);
2797                                         continue;
2798                                 }
2799                         }
2800                 }
2801
2802                 if (!delete && (!backref->found_dir_index &&
2803                                 !backref->found_dir_item &&
2804                                 backref->found_inode_ref)) {
2805                         struct btrfs_trans_handle *trans;
2806                         struct btrfs_key location;
2807
2808                         ret = check_dir_conflict(root, backref->name,
2809                                                  backref->namelen,
2810                                                  backref->dir,
2811                                                  backref->index);
2812                         if (ret) {
2813                                 /*
2814                                  * let nlink fixing routine to handle it,
2815                                  * which can do it better.
2816                                  */
2817                                 ret = 0;
2818                                 break;
2819                         }
2820                         location.objectid = rec->ino;
2821                         location.type = BTRFS_INODE_ITEM_KEY;
2822                         location.offset = 0;
2823
2824                         trans = btrfs_start_transaction(root, 1);
2825                         if (IS_ERR(trans)) {
2826                                 ret = PTR_ERR(trans);
2827                                 break;
2828                         }
2829                         fprintf(stderr, "adding missing dir index/item pair "
2830                                 "for inode %llu\n",
2831                                 (unsigned long long)rec->ino);
2832                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2833                                                     backref->namelen,
2834                                                     backref->dir, &location,
2835                                                     imode_to_type(rec->imode),
2836                                                     backref->index);
2837                         BUG_ON(ret);
2838                         btrfs_commit_transaction(trans, root);
2839                         repaired++;
2840                 }
2841
2842                 if (!delete && (backref->found_inode_ref &&
2843                                 backref->found_dir_index &&
2844                                 backref->found_dir_item &&
2845                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846                                 !rec->found_inode_item)) {
2847                         ret = create_inode_item(root, rec, 0);
2848                         if (ret)
2849                                 break;
2850                         repaired++;
2851                 }
2852
2853         }
2854         return ret ? ret : repaired;
2855 }
2856
2857 /*
2858  * To determine the file type for nlink/inode_item repair
2859  *
2860  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861  * Return -ENOENT if file type is not found.
2862  */
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2864 {
2865         struct inode_backref *backref;
2866
2867         /* For inode item recovered case */
2868         if (rec->found_inode_item) {
2869                 *type = imode_to_type(rec->imode);
2870                 return 0;
2871         }
2872
2873         list_for_each_entry(backref, &rec->backrefs, list) {
2874                 if (backref->found_dir_index || backref->found_dir_item) {
2875                         *type = backref->filetype;
2876                         return 0;
2877                 }
2878         }
2879         return -ENOENT;
2880 }
2881
2882 /*
2883  * To determine the file name for nlink repair
2884  *
2885  * Return 0 if file name is found, set name and namelen.
2886  * Return -ENOENT if file name is not found.
2887  */
2888 static int find_file_name(struct inode_record *rec,
2889                           char *name, int *namelen)
2890 {
2891         struct inode_backref *backref;
2892
2893         list_for_each_entry(backref, &rec->backrefs, list) {
2894                 if (backref->found_dir_index || backref->found_dir_item ||
2895                     backref->found_inode_ref) {
2896                         memcpy(name, backref->name, backref->namelen);
2897                         *namelen = backref->namelen;
2898                         return 0;
2899                 }
2900         }
2901         return -ENOENT;
2902 }
2903
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906                        struct btrfs_root *root,
2907                        struct btrfs_path *path,
2908                        struct inode_record *rec)
2909 {
2910         struct inode_backref *backref;
2911         struct inode_backref *tmp;
2912         struct btrfs_key key;
2913         struct btrfs_inode_item *inode_item;
2914         int ret = 0;
2915
2916         /* We don't believe this either, reset it and iterate backref */
2917         rec->found_link = 0;
2918
2919         /* Remove all backref including the valid ones */
2920         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922                                    backref->index, backref->name,
2923                                    backref->namelen, 0);
2924                 if (ret < 0)
2925                         goto out;
2926
2927                 /* remove invalid backref, so it won't be added back */
2928                 if (!(backref->found_dir_index &&
2929                       backref->found_dir_item &&
2930                       backref->found_inode_ref)) {
2931                         list_del(&backref->list);
2932                         free(backref);
2933                 } else {
2934                         rec->found_link++;
2935                 }
2936         }
2937
2938         /* Set nlink to 0 */
2939         key.objectid = rec->ino;
2940         key.type = BTRFS_INODE_ITEM_KEY;
2941         key.offset = 0;
2942         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2943         if (ret < 0)
2944                 goto out;
2945         if (ret > 0) {
2946                 ret = -ENOENT;
2947                 goto out;
2948         }
2949         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950                                     struct btrfs_inode_item);
2951         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952         btrfs_mark_buffer_dirty(path->nodes[0]);
2953         btrfs_release_path(path);
2954
2955         /*
2956          * Add back valid inode_ref/dir_item/dir_index,
2957          * add_link() will handle the nlink inc, so new nlink must be correct
2958          */
2959         list_for_each_entry(backref, &rec->backrefs, list) {
2960                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961                                      backref->name, backref->namelen,
2962                                      backref->filetype, &backref->index, 1);
2963                 if (ret < 0)
2964                         goto out;
2965         }
2966 out:
2967         btrfs_release_path(path);
2968         return ret;
2969 }
2970
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972                                 struct btrfs_root *root,
2973                                 struct btrfs_path *path,
2974                                 u64 *highest_ino)
2975 {
2976         struct btrfs_key key, found_key;
2977         int ret;
2978
2979         btrfs_init_path(path);
2980         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2981         key.offset = -1;
2982         key.type = BTRFS_INODE_ITEM_KEY;
2983         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2984         if (ret == 1) {
2985                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986                                 path->slots[0] - 1);
2987                 *highest_ino = found_key.objectid;
2988                 ret = 0;
2989         }
2990         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2991                 ret = -EOVERFLOW;
2992         btrfs_release_path(path);
2993         return ret;
2994 }
2995
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997                                struct btrfs_root *root,
2998                                struct btrfs_path *path,
2999                                struct inode_record *rec)
3000 {
3001         char *dir_name = "lost+found";
3002         char namebuf[BTRFS_NAME_LEN] = {0};
3003         u64 lost_found_ino;
3004         u32 mode = 0700;
3005         u8 type = 0;
3006         int namelen = 0;
3007         int name_recovered = 0;
3008         int type_recovered = 0;
3009         int ret = 0;
3010
3011         /*
3012          * Get file name and type first before these invalid inode ref
3013          * are deleted by remove_all_invalid_backref()
3014          */
3015         name_recovered = !find_file_name(rec, namebuf, &namelen);
3016         type_recovered = !find_file_type(rec, &type);
3017
3018         if (!name_recovered) {
3019                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020                        rec->ino, rec->ino);
3021                 namelen = count_digits(rec->ino);
3022                 sprintf(namebuf, "%llu", rec->ino);
3023                 name_recovered = 1;
3024         }
3025         if (!type_recovered) {
3026                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3027                        rec->ino);
3028                 type = BTRFS_FT_REG_FILE;
3029                 type_recovered = 1;
3030         }
3031
3032         ret = reset_nlink(trans, root, path, rec);
3033         if (ret < 0) {
3034                 fprintf(stderr,
3035                         "Failed to reset nlink for inode %llu: %s\n",
3036                         rec->ino, strerror(-ret));
3037                 goto out;
3038         }
3039
3040         if (rec->found_link == 0) {
3041                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3042                 if (ret < 0)
3043                         goto out;
3044                 lost_found_ino++;
3045                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3047                                   mode);
3048                 if (ret < 0) {
3049                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050                                 dir_name, strerror(-ret));
3051                         goto out;
3052                 }
3053                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054                                      namebuf, namelen, type, NULL, 1);
3055                 /*
3056                  * Add ".INO" suffix several times to handle case where
3057                  * "FILENAME.INO" is already taken by another file.
3058                  */
3059                 while (ret == -EEXIST) {
3060                         /*
3061                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3062                          */
3063                         if (namelen + count_digits(rec->ino) + 1 >
3064                             BTRFS_NAME_LEN) {
3065                                 ret = -EFBIG;
3066                                 goto out;
3067                         }
3068                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3069                                  ".%llu", rec->ino);
3070                         namelen += count_digits(rec->ino) + 1;
3071                         ret = btrfs_add_link(trans, root, rec->ino,
3072                                              lost_found_ino, namebuf,
3073                                              namelen, type, NULL, 1);
3074                 }
3075                 if (ret < 0) {
3076                         fprintf(stderr,
3077                                 "Failed to link the inode %llu to %s dir: %s\n",
3078                                 rec->ino, dir_name, strerror(-ret));
3079                         goto out;
3080                 }
3081                 /*
3082                  * Just increase the found_link, don't actually add the
3083                  * backref. This will make things easier and this inode
3084                  * record will be freed after the repair is done.
3085                  * So fsck will not report problem about this inode.
3086                  */
3087                 rec->found_link++;
3088                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089                        namelen, namebuf, dir_name);
3090         }
3091         printf("Fixed the nlink of inode %llu\n", rec->ino);
3092 out:
3093         /*
3094          * Clear the flag anyway, or we will loop forever for the same inode
3095          * as it will not be removed from the bad inode list and the dead loop
3096          * happens.
3097          */
3098         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099         btrfs_release_path(path);
3100         return ret;
3101 }
3102
3103 /*
3104  * Check if there is any normal(reg or prealloc) file extent for given
3105  * ino.
3106  * This is used to determine the file type when neither its dir_index/item or
3107  * inode_item exists.
3108  *
3109  * This will *NOT* report error, if any error happens, just consider it does
3110  * not have any normal file extent.
3111  */
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3113 {
3114         struct btrfs_path path;
3115         struct btrfs_key key;
3116         struct btrfs_key found_key;
3117         struct btrfs_file_extent_item *fi;
3118         u8 type;
3119         int ret = 0;
3120
3121         btrfs_init_path(&path);
3122         key.objectid = ino;
3123         key.type = BTRFS_EXTENT_DATA_KEY;
3124         key.offset = 0;
3125
3126         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3127         if (ret < 0) {
3128                 ret = 0;
3129                 goto out;
3130         }
3131         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132                 ret = btrfs_next_leaf(root, &path);
3133                 if (ret) {
3134                         ret = 0;
3135                         goto out;
3136                 }
3137         }
3138         while (1) {
3139                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3140                                       path.slots[0]);
3141                 if (found_key.objectid != ino ||
3142                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3143                         break;
3144                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145                                     struct btrfs_file_extent_item);
3146                 type = btrfs_file_extent_type(path.nodes[0], fi);
3147                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3148                         ret = 1;
3149                         goto out;
3150                 }
3151         }
3152 out:
3153         btrfs_release_path(&path);
3154         return ret;
3155 }
3156
3157 static u32 btrfs_type_to_imode(u8 type)
3158 {
3159         static u32 imode_by_btrfs_type[] = {
3160                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3161                 [BTRFS_FT_DIR]          = S_IFDIR,
3162                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3163                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3164                 [BTRFS_FT_FIFO]         = S_IFIFO,
3165                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3166                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3167         };
3168
3169         return imode_by_btrfs_type[(type)];
3170 }
3171
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173                                 struct btrfs_root *root,
3174                                 struct btrfs_path *path,
3175                                 struct inode_record *rec)
3176 {
3177         u8 filetype;
3178         u32 mode = 0700;
3179         int type_recovered = 0;
3180         int ret = 0;
3181
3182         printf("Trying to rebuild inode:%llu\n", rec->ino);
3183
3184         type_recovered = !find_file_type(rec, &filetype);
3185
3186         /*
3187          * Try to determine inode type if type not found.
3188          *
3189          * For found regular file extent, it must be FILE.
3190          * For found dir_item/index, it must be DIR.
3191          *
3192          * For undetermined one, use FILE as fallback.
3193          *
3194          * TODO:
3195          * 1. If found backref(inode_index/item is already handled) to it,
3196          *    it must be DIR.
3197          *    Need new inode-inode ref structure to allow search for that.
3198          */
3199         if (!type_recovered) {
3200                 if (rec->found_file_extent &&
3201                     find_normal_file_extent(root, rec->ino)) {
3202                         type_recovered = 1;
3203                         filetype = BTRFS_FT_REG_FILE;
3204                 } else if (rec->found_dir_item) {
3205                         type_recovered = 1;
3206                         filetype = BTRFS_FT_DIR;
3207                 } else if (!list_empty(&rec->orphan_extents)) {
3208                         type_recovered = 1;
3209                         filetype = BTRFS_FT_REG_FILE;
3210                 } else{
3211                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3212                                rec->ino);
3213                         type_recovered = 1;
3214                         filetype = BTRFS_FT_REG_FILE;
3215                 }
3216         }
3217
3218         ret = btrfs_new_inode(trans, root, rec->ino,
3219                               mode | btrfs_type_to_imode(filetype));
3220         if (ret < 0)
3221                 goto out;
3222
3223         /*
3224          * Here inode rebuild is done, we only rebuild the inode item,
3225          * don't repair the nlink(like move to lost+found).
3226          * That is the job of nlink repair.
3227          *
3228          * We just fill the record and return
3229          */
3230         rec->found_dir_item = 1;
3231         rec->imode = mode | btrfs_type_to_imode(filetype);
3232         rec->nlink = 0;
3233         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234         /* Ensure the inode_nlinks repair function will be called */
3235         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3236 out:
3237         return ret;
3238 }
3239
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241                                       struct btrfs_root *root,
3242                                       struct btrfs_path *path,
3243                                       struct inode_record *rec)
3244 {
3245         struct orphan_data_extent *orphan;
3246         struct orphan_data_extent *tmp;
3247         int ret = 0;
3248
3249         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3250                 /*
3251                  * Check for conflicting file extents
3252                  *
3253                  * Here we don't know whether the extents is compressed or not,
3254                  * so we can only assume it not compressed nor data offset,
3255                  * and use its disk_len as extent length.
3256                  */
3257                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258                                        orphan->offset, orphan->disk_len, 0);
3259                 btrfs_release_path(path);
3260                 if (ret < 0)
3261                         goto out;
3262                 if (!ret) {
3263                         fprintf(stderr,
3264                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265                                 orphan->disk_bytenr, orphan->disk_len);
3266                         ret = btrfs_free_extent(trans,
3267                                         root->fs_info->extent_root,
3268                                         orphan->disk_bytenr, orphan->disk_len,
3269                                         0, root->objectid, orphan->objectid,
3270                                         orphan->offset);
3271                         if (ret < 0)
3272                                 goto out;
3273                 }
3274                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275                                 orphan->offset, orphan->disk_bytenr,
3276                                 orphan->disk_len, orphan->disk_len);
3277                 if (ret < 0)
3278                         goto out;
3279
3280                 /* Update file size info */
3281                 rec->found_size += orphan->disk_len;
3282                 if (rec->found_size == rec->nbytes)
3283                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3284
3285                 /* Update the file extent hole info too */
3286                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3287                                            orphan->disk_len);
3288                 if (ret < 0)
3289                         goto out;
3290                 if (RB_EMPTY_ROOT(&rec->holes))
3291                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3292
3293                 list_del(&orphan->list);
3294                 free(orphan);
3295         }
3296         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3297 out:
3298         return ret;
3299 }
3300
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302                                         struct btrfs_root *root,
3303                                         struct btrfs_path *path,
3304                                         struct inode_record *rec)
3305 {
3306         struct rb_node *node;
3307         struct file_extent_hole *hole;
3308         int found = 0;
3309         int ret = 0;
3310
3311         node = rb_first(&rec->holes);
3312
3313         while (node) {
3314                 found = 1;
3315                 hole = rb_entry(node, struct file_extent_hole, node);
3316                 ret = btrfs_punch_hole(trans, root, rec->ino,
3317                                        hole->start, hole->len);
3318                 if (ret < 0)
3319                         goto out;
3320                 ret = del_file_extent_hole(&rec->holes, hole->start,
3321                                            hole->len);
3322                 if (ret < 0)
3323                         goto out;
3324                 if (RB_EMPTY_ROOT(&rec->holes))
3325                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326                 node = rb_first(&rec->holes);
3327         }
3328         /* special case for a file losing all its file extent */
3329         if (!found) {
3330                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331                                        round_up(rec->isize,
3332                                                 root->fs_info->sectorsize));
3333                 if (ret < 0)
3334                         goto out;
3335         }
3336         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337                rec->ino, root->objectid);
3338 out:
3339         return ret;
3340 }
3341
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3343 {
3344         struct btrfs_trans_handle *trans;
3345         struct btrfs_path path;
3346         int ret = 0;
3347
3348         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349                              I_ERR_NO_ORPHAN_ITEM |
3350                              I_ERR_LINK_COUNT_WRONG |
3351                              I_ERR_NO_INODE_ITEM |
3352                              I_ERR_FILE_EXTENT_ORPHAN |
3353                              I_ERR_FILE_EXTENT_DISCOUNT|
3354                              I_ERR_FILE_NBYTES_WRONG)))
3355                 return rec->errors;
3356
3357         /*
3358          * For nlink repair, it may create a dir and add link, so
3359          * 2 for parent(256)'s dir_index and dir_item
3360          * 2 for lost+found dir's inode_item and inode_ref
3361          * 1 for the new inode_ref of the file
3362          * 2 for lost+found dir's dir_index and dir_item for the file
3363          */
3364         trans = btrfs_start_transaction(root, 7);
3365         if (IS_ERR(trans))
3366                 return PTR_ERR(trans);
3367
3368         btrfs_init_path(&path);
3369         if (rec->errors & I_ERR_NO_INODE_ITEM)
3370                 ret = repair_inode_no_item(trans, root, &path, rec);
3371         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376                 ret = repair_inode_isize(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380                 ret = repair_inode_nlinks(trans, root, &path, rec);
3381         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382                 ret = repair_inode_nbytes(trans, root, &path, rec);
3383         btrfs_commit_transaction(trans, root);
3384         btrfs_release_path(&path);
3385         return ret;
3386 }
3387
3388 static int check_inode_recs(struct btrfs_root *root,
3389                             struct cache_tree *inode_cache)
3390 {
3391         struct cache_extent *cache;
3392         struct ptr_node *node;
3393         struct inode_record *rec;
3394         struct inode_backref *backref;
3395         int stage = 0;
3396         int ret = 0;
3397         int err = 0;
3398         u64 error = 0;
3399         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3400
3401         if (btrfs_root_refs(&root->root_item) == 0) {
3402                 if (!cache_tree_empty(inode_cache))
3403                         fprintf(stderr, "warning line %d\n", __LINE__);
3404                 return 0;
3405         }
3406
3407         /*
3408          * We need to repair backrefs first because we could change some of the
3409          * errors in the inode recs.
3410          *
3411          * We also need to go through and delete invalid backrefs first and then
3412          * add the correct ones second.  We do this because we may get EEXIST
3413          * when adding back the correct index because we hadn't yet deleted the
3414          * invalid index.
3415          *
3416          * For example, if we were missing a dir index then the directories
3417          * isize would be wrong, so if we fixed the isize to what we thought it
3418          * would be and then fixed the backref we'd still have a invalid fs, so
3419          * we need to add back the dir index and then check to see if the isize
3420          * is still wrong.
3421          */
3422         while (stage < 3) {
3423                 stage++;
3424                 if (stage == 3 && !err)
3425                         break;
3426
3427                 cache = search_cache_extent(inode_cache, 0);
3428                 while (repair && cache) {
3429                         node = container_of(cache, struct ptr_node, cache);
3430                         rec = node->data;
3431                         cache = next_cache_extent(cache);
3432
3433                         /* Need to free everything up and rescan */
3434                         if (stage == 3) {
3435                                 remove_cache_extent(inode_cache, &node->cache);
3436                                 free(node);
3437                                 free_inode_rec(rec);
3438                                 continue;
3439                         }
3440
3441                         if (list_empty(&rec->backrefs))
3442                                 continue;
3443
3444                         ret = repair_inode_backrefs(root, rec, inode_cache,
3445                                                     stage == 1);
3446                         if (ret < 0) {
3447                                 err = ret;
3448                                 stage = 2;
3449                                 break;
3450                         } if (ret > 0) {
3451                                 err = -EAGAIN;
3452                         }
3453                 }
3454         }
3455         if (err)
3456                 return err;
3457
3458         rec = get_inode_rec(inode_cache, root_dirid, 0);
3459         BUG_ON(IS_ERR(rec));
3460         if (rec) {
3461                 ret = check_root_dir(rec);
3462                 if (ret) {
3463                         fprintf(stderr, "root %llu root dir %llu error\n",
3464                                 (unsigned long long)root->root_key.objectid,
3465                                 (unsigned long long)root_dirid);
3466                         print_inode_error(root, rec);
3467                         error++;
3468                 }
3469         } else {
3470                 if (repair) {
3471                         struct btrfs_trans_handle *trans;
3472
3473                         trans = btrfs_start_transaction(root, 1);
3474                         if (IS_ERR(trans)) {
3475                                 err = PTR_ERR(trans);
3476                                 return err;
3477                         }
3478
3479                         fprintf(stderr,
3480                                 "root %llu missing its root dir, recreating\n",
3481                                 (unsigned long long)root->objectid);
3482
3483                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3484                         BUG_ON(ret);
3485
3486                         btrfs_commit_transaction(trans, root);
3487                         return -EAGAIN;
3488                 }
3489
3490                 fprintf(stderr, "root %llu root dir %llu not found\n",
3491                         (unsigned long long)root->root_key.objectid,
3492                         (unsigned long long)root_dirid);
3493         }
3494
3495         while (1) {
3496                 cache = search_cache_extent(inode_cache, 0);
3497                 if (!cache)
3498                         break;
3499                 node = container_of(cache, struct ptr_node, cache);
3500                 rec = node->data;
3501                 remove_cache_extent(inode_cache, &node->cache);
3502                 free(node);
3503                 if (rec->ino == root_dirid ||
3504                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505                         free_inode_rec(rec);
3506                         continue;
3507                 }
3508
3509                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510                         ret = check_orphan_item(root, rec->ino);
3511                         if (ret == 0)
3512                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513                         if (can_free_inode_rec(rec)) {
3514                                 free_inode_rec(rec);
3515                                 continue;
3516                         }
3517                 }
3518
3519                 if (!rec->found_inode_item)
3520                         rec->errors |= I_ERR_NO_INODE_ITEM;
3521                 if (rec->found_link != rec->nlink)
3522                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3523                 if (repair) {
3524                         ret = try_repair_inode(root, rec);
3525                         if (ret == 0 && can_free_inode_rec(rec)) {
3526                                 free_inode_rec(rec);
3527                                 continue;
3528                         }
3529                         ret = 0;
3530                 }
3531
3532                 if (!(repair && ret == 0))
3533                         error++;
3534                 print_inode_error(root, rec);
3535                 list_for_each_entry(backref, &rec->backrefs, list) {
3536                         if (!backref->found_dir_item)
3537                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538                         if (!backref->found_dir_index)
3539                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540                         if (!backref->found_inode_ref)
3541                                 backref->errors |= REF_ERR_NO_INODE_REF;
3542                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543                                 " namelen %u name %s filetype %d errors %x",
3544                                 (unsigned long long)backref->dir,
3545                                 (unsigned long long)backref->index,
3546                                 backref->namelen, backref->name,
3547                                 backref->filetype, backref->errors);
3548                         print_ref_error(backref->errors);
3549                 }
3550                 free_inode_rec(rec);
3551         }
3552         return (error > 0) ? -1 : 0;
3553 }
3554
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3556                                         u64 objectid)
3557 {
3558         struct cache_extent *cache;
3559         struct root_record *rec = NULL;
3560         int ret;
3561
3562         cache = lookup_cache_extent(root_cache, objectid, 1);
3563         if (cache) {
3564                 rec = container_of(cache, struct root_record, cache);
3565         } else {
3566                 rec = calloc(1, sizeof(*rec));
3567                 if (!rec)
3568                         return ERR_PTR(-ENOMEM);
3569                 rec->objectid = objectid;
3570                 INIT_LIST_HEAD(&rec->backrefs);
3571                 rec->cache.start = objectid;
3572                 rec->cache.size = 1;
3573
3574                 ret = insert_cache_extent(root_cache, &rec->cache);
3575                 if (ret)
3576                         return ERR_PTR(-EEXIST);
3577         }
3578         return rec;
3579 }
3580
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582                                              u64 ref_root, u64 dir, u64 index,
3583                                              const char *name, int namelen)
3584 {
3585         struct root_backref *backref;
3586
3587         list_for_each_entry(backref, &rec->backrefs, list) {
3588                 if (backref->ref_root != ref_root || backref->dir != dir ||
3589                     backref->namelen != namelen)
3590                         continue;
3591                 if (memcmp(name, backref->name, namelen))
3592                         continue;
3593                 return backref;
3594         }
3595
3596         backref = calloc(1, sizeof(*backref) + namelen + 1);
3597         if (!backref)
3598                 return NULL;
3599         backref->ref_root = ref_root;
3600         backref->dir = dir;
3601         backref->index = index;
3602         backref->namelen = namelen;
3603         memcpy(backref->name, name, namelen);
3604         backref->name[namelen] = '\0';
3605         list_add_tail(&backref->list, &rec->backrefs);
3606         return backref;
3607 }
3608
3609 static void free_root_record(struct cache_extent *cache)
3610 {
3611         struct root_record *rec;
3612         struct root_backref *backref;
3613
3614         rec = container_of(cache, struct root_record, cache);
3615         while (!list_empty(&rec->backrefs)) {
3616                 backref = to_root_backref(rec->backrefs.next);
3617                 list_del(&backref->list);
3618                 free(backref);
3619         }
3620
3621         free(rec);
3622 }
3623
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3625
3626 static int add_root_backref(struct cache_tree *root_cache,
3627                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3628                             const char *name, int namelen,
3629                             int item_type, int errors)
3630 {
3631         struct root_record *rec;
3632         struct root_backref *backref;
3633
3634         rec = get_root_rec(root_cache, root_id);
3635         BUG_ON(IS_ERR(rec));
3636         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3637         BUG_ON(!backref);
3638
3639         backref->errors |= errors;
3640
3641         if (item_type != BTRFS_DIR_ITEM_KEY) {
3642                 if (backref->found_dir_index || backref->found_back_ref ||
3643                     backref->found_forward_ref) {
3644                         if (backref->index != index)
3645                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3646                 } else {
3647                         backref->index = index;
3648                 }
3649         }
3650
3651         if (item_type == BTRFS_DIR_ITEM_KEY) {
3652                 if (backref->found_forward_ref)
3653                         rec->found_ref++;
3654                 backref->found_dir_item = 1;
3655         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656                 backref->found_dir_index = 1;
3657         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658                 if (backref->found_forward_ref)
3659                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3660                 else if (backref->found_dir_item)
3661                         rec->found_ref++;
3662                 backref->found_forward_ref = 1;
3663         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664                 if (backref->found_back_ref)
3665                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666                 backref->found_back_ref = 1;
3667         } else {
3668                 BUG_ON(1);
3669         }
3670
3671         if (backref->found_forward_ref && backref->found_dir_item)
3672                 backref->reachable = 1;
3673         return 0;
3674 }
3675
3676 static int merge_root_recs(struct btrfs_root *root,
3677                            struct cache_tree *src_cache,
3678                            struct cache_tree *dst_cache)
3679 {
3680         struct cache_extent *cache;
3681         struct ptr_node *node;
3682         struct inode_record *rec;
3683         struct inode_backref *backref;
3684         int ret = 0;
3685
3686         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687                 free_inode_recs_tree(src_cache);
3688                 return 0;
3689         }
3690
3691         while (1) {
3692                 cache = search_cache_extent(src_cache, 0);
3693                 if (!cache)
3694                         break;
3695                 node = container_of(cache, struct ptr_node, cache);
3696                 rec = node->data;
3697                 remove_cache_extent(src_cache, &node->cache);
3698                 free(node);
3699
3700                 ret = is_child_root(root, root->objectid, rec->ino);
3701                 if (ret < 0)
3702                         break;
3703                 else if (ret == 0)
3704                         goto skip;
3705
3706                 list_for_each_entry(backref, &rec->backrefs, list) {
3707                         BUG_ON(backref->found_inode_ref);
3708                         if (backref->found_dir_item)
3709                                 add_root_backref(dst_cache, rec->ino,
3710                                         root->root_key.objectid, backref->dir,
3711                                         backref->index, backref->name,
3712                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3713                                         backref->errors);
3714                         if (backref->found_dir_index)
3715                                 add_root_backref(dst_cache, rec->ino,
3716                                         root->root_key.objectid, backref->dir,
3717                                         backref->index, backref->name,
3718                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3719                                         backref->errors);
3720                 }
3721 skip:
3722                 free_inode_rec(rec);
3723         }
3724         if (ret < 0)
3725                 return ret;
3726         return 0;
3727 }
3728
3729 static int check_root_refs(struct btrfs_root *root,
3730                            struct cache_tree *root_cache)
3731 {
3732         struct root_record *rec;
3733         struct root_record *ref_root;
3734         struct root_backref *backref;
3735         struct cache_extent *cache;
3736         int loop = 1;
3737         int ret;
3738         int error;
3739         int errors = 0;
3740
3741         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742         BUG_ON(IS_ERR(rec));
3743         rec->found_ref = 1;
3744
3745         /* fixme: this can not detect circular references */
3746         while (loop) {
3747                 loop = 0;
3748                 cache = search_cache_extent(root_cache, 0);
3749                 while (1) {
3750                         if (!cache)
3751                                 break;
3752                         rec = container_of(cache, struct root_record, cache);
3753                         cache = next_cache_extent(cache);
3754
3755                         if (rec->found_ref == 0)
3756                                 continue;
3757
3758                         list_for_each_entry(backref, &rec->backrefs, list) {
3759                                 if (!backref->reachable)
3760                                         continue;
3761
3762                                 ref_root = get_root_rec(root_cache,
3763                                                         backref->ref_root);
3764                                 BUG_ON(IS_ERR(ref_root));
3765                                 if (ref_root->found_ref > 0)
3766                                         continue;
3767
3768                                 backref->reachable = 0;
3769                                 rec->found_ref--;
3770                                 if (rec->found_ref == 0)
3771                                         loop = 1;
3772                         }
3773                 }
3774         }
3775
3776         cache = search_cache_extent(root_cache, 0);
3777         while (1) {
3778                 if (!cache)
3779                         break;
3780                 rec = container_of(cache, struct root_record, cache);
3781                 cache = next_cache_extent(cache);
3782
3783                 if (rec->found_ref == 0 &&
3784                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786                         ret = check_orphan_item(root->fs_info->tree_root,
3787                                                 rec->objectid);
3788                         if (ret == 0)
3789                                 continue;
3790
3791                         /*
3792                          * If we don't have a root item then we likely just have
3793                          * a dir item in a snapshot for this root but no actual
3794                          * ref key or anything so it's meaningless.
3795                          */
3796                         if (!rec->found_root_item)
3797                                 continue;
3798                         errors++;
3799                         fprintf(stderr, "fs tree %llu not referenced\n",
3800                                 (unsigned long long)rec->objectid);
3801                 }
3802
3803                 error = 0;
3804                 if (rec->found_ref > 0 && !rec->found_root_item)
3805                         error = 1;
3806                 list_for_each_entry(backref, &rec->backrefs, list) {
3807                         if (!backref->found_dir_item)
3808                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809                         if (!backref->found_dir_index)
3810                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811                         if (!backref->found_back_ref)
3812                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813                         if (!backref->found_forward_ref)
3814                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3815                         if (backref->reachable && backref->errors)
3816                                 error = 1;
3817                 }
3818                 if (!error)
3819                         continue;
3820
3821                 errors++;
3822                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823                         (unsigned long long)rec->objectid, rec->found_ref,
3824                          rec->found_root_item ? "" : "not found");
3825
3826                 list_for_each_entry(backref, &rec->backrefs, list) {
3827                         if (!backref->reachable)
3828                                 continue;
3829                         if (!backref->errors && rec->found_root_item)
3830                                 continue;
3831                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832                                 " index %llu namelen %u name %s errors %x\n",
3833                                 (unsigned long long)backref->ref_root,
3834                                 (unsigned long long)backref->dir,
3835                                 (unsigned long long)backref->index,
3836                                 backref->namelen, backref->name,
3837                                 backref->errors);
3838                         print_ref_error(backref->errors);
3839                 }
3840         }
3841         return errors > 0 ? 1 : 0;
3842 }
3843
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845                             struct btrfs_key *key,
3846                             struct cache_tree *root_cache)
3847 {
3848         u64 dirid;
3849         u64 index;
3850         u32 len;
3851         u32 name_len;
3852         struct btrfs_root_ref *ref;
3853         char namebuf[BTRFS_NAME_LEN];
3854         int error;
3855
3856         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3857
3858         dirid = btrfs_root_ref_dirid(eb, ref);
3859         index = btrfs_root_ref_sequence(eb, ref);
3860         name_len = btrfs_root_ref_name_len(eb, ref);
3861
3862         if (name_len <= BTRFS_NAME_LEN) {
3863                 len = name_len;
3864                 error = 0;
3865         } else {
3866                 len = BTRFS_NAME_LEN;
3867                 error = REF_ERR_NAME_TOO_LONG;
3868         }
3869         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3870
3871         if (key->type == BTRFS_ROOT_REF_KEY) {
3872                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873                                  index, namebuf, len, key->type, error);
3874         } else {
3875                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876                                  index, namebuf, len, key->type, error);
3877         }
3878         return 0;
3879 }
3880
3881 static void free_corrupt_block(struct cache_extent *cache)
3882 {
3883         struct btrfs_corrupt_block *corrupt;
3884
3885         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3886         free(corrupt);
3887 }
3888
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3890
3891 /*
3892  * Repair the btree of the given root.
3893  *
3894  * The fix is to remove the node key in corrupt_blocks cache_tree.
3895  * and rebalance the tree.
3896  * After the fix, the btree should be writeable.
3897  */
3898 static int repair_btree(struct btrfs_root *root,
3899                         struct cache_tree *corrupt_blocks)
3900 {
3901         struct btrfs_trans_handle *trans;
3902         struct btrfs_path path;
3903         struct btrfs_corrupt_block *corrupt;
3904         struct cache_extent *cache;
3905         struct btrfs_key key;
3906         u64 offset;
3907         int level;
3908         int ret = 0;
3909
3910         if (cache_tree_empty(corrupt_blocks))
3911                 return 0;
3912
3913         trans = btrfs_start_transaction(root, 1);
3914         if (IS_ERR(trans)) {
3915                 ret = PTR_ERR(trans);
3916                 fprintf(stderr, "Error starting transaction: %s\n",
3917                         strerror(-ret));
3918                 return ret;
3919         }
3920         btrfs_init_path(&path);
3921         cache = first_cache_extent(corrupt_blocks);
3922         while (cache) {
3923                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3924                                        cache);
3925                 level = corrupt->level;
3926                 path.lowest_level = level;
3927                 key.objectid = corrupt->key.objectid;
3928                 key.type = corrupt->key.type;
3929                 key.offset = corrupt->key.offset;
3930
3931                 /*
3932                  * Here we don't want to do any tree balance, since it may
3933                  * cause a balance with corrupted brother leaf/node,
3934                  * so ins_len set to 0 here.
3935                  * Balance will be done after all corrupt node/leaf is deleted.
3936                  */
3937                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3938                 if (ret < 0)
3939                         goto out;
3940                 offset = btrfs_node_blockptr(path.nodes[level],
3941                                              path.slots[level]);
3942
3943                 /* Remove the ptr */
3944                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3945                 if (ret < 0)
3946                         goto out;
3947                 /*
3948                  * Remove the corresponding extent
3949                  * return value is not concerned.
3950                  */
3951                 btrfs_release_path(&path);
3952                 ret = btrfs_free_extent(trans, root, offset,
3953                                 root->fs_info->nodesize, 0,
3954                                 root->root_key.objectid, level - 1, 0);
3955                 cache = next_cache_extent(cache);
3956         }
3957
3958         /* Balance the btree using btrfs_search_slot() */
3959         cache = first_cache_extent(corrupt_blocks);
3960         while (cache) {
3961                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3962                                        cache);
3963                 memcpy(&key, &corrupt->key, sizeof(key));
3964                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3965                 if (ret < 0)
3966                         goto out;
3967                 /* return will always >0 since it won't find the item */
3968                 ret = 0;
3969                 btrfs_release_path(&path);
3970                 cache = next_cache_extent(cache);
3971         }
3972 out:
3973         btrfs_commit_transaction(trans, root);
3974         btrfs_release_path(&path);
3975         return ret;
3976 }
3977
3978 static int check_fs_root(struct btrfs_root *root,
3979                          struct cache_tree *root_cache,
3980                          struct walk_control *wc)
3981 {
3982         int ret = 0;
3983         int err = 0;
3984         int wret;
3985         int level;
3986         struct btrfs_path path;
3987         struct shared_node root_node;
3988         struct root_record *rec;
3989         struct btrfs_root_item *root_item = &root->root_item;
3990         struct cache_tree corrupt_blocks;
3991         struct orphan_data_extent *orphan;
3992         struct orphan_data_extent *tmp;
3993         enum btrfs_tree_block_status status;
3994         struct node_refs nrefs;
3995
3996         /*
3997          * Reuse the corrupt_block cache tree to record corrupted tree block
3998          *
3999          * Unlike the usage in extent tree check, here we do it in a per
4000          * fs/subvol tree base.
4001          */
4002         cache_tree_init(&corrupt_blocks);
4003         root->fs_info->corrupt_blocks = &corrupt_blocks;
4004
4005         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006                 rec = get_root_rec(root_cache, root->root_key.objectid);
4007                 BUG_ON(IS_ERR(rec));
4008                 if (btrfs_root_refs(root_item) > 0)
4009                         rec->found_root_item = 1;
4010         }
4011
4012         btrfs_init_path(&path);
4013         memset(&root_node, 0, sizeof(root_node));
4014         cache_tree_init(&root_node.root_cache);
4015         cache_tree_init(&root_node.inode_cache);
4016         memset(&nrefs, 0, sizeof(nrefs));
4017
4018         /* Move the orphan extent record to corresponding inode_record */
4019         list_for_each_entry_safe(orphan, tmp,
4020                                  &root->orphan_data_extents, list) {
4021                 struct inode_record *inode;
4022
4023                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4024                                       1);
4025                 BUG_ON(IS_ERR(inode));
4026                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027                 list_move(&orphan->list, &inode->orphan_extents);
4028         }
4029
4030         level = btrfs_header_level(root->node);
4031         memset(wc->nodes, 0, sizeof(wc->nodes));
4032         wc->nodes[level] = &root_node;
4033         wc->active_node = level;
4034         wc->root_level = level;
4035
4036         /* We may not have checked the root block, lets do that now */
4037         if (btrfs_is_leaf(root->node))
4038                 status = btrfs_check_leaf(root, NULL, root->node);
4039         else
4040                 status = btrfs_check_node(root, NULL, root->node);
4041         if (status != BTRFS_TREE_BLOCK_CLEAN)
4042                 return -EIO;
4043
4044         if (btrfs_root_refs(root_item) > 0 ||
4045             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046                 path.nodes[level] = root->node;
4047                 extent_buffer_get(root->node);
4048                 path.slots[level] = 0;
4049         } else {
4050                 struct btrfs_key key;
4051                 struct btrfs_disk_key found_key;
4052
4053                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054                 level = root_item->drop_level;
4055                 path.lowest_level = level;
4056                 if (level > btrfs_header_level(root->node) ||
4057                     level >= BTRFS_MAX_LEVEL) {
4058                         error("ignoring invalid drop level: %u", level);
4059                         goto skip_walking;
4060                 }
4061                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4062                 if (wret < 0)
4063                         goto skip_walking;
4064                 btrfs_node_key(path.nodes[level], &found_key,
4065                                 path.slots[level]);
4066                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067                                         sizeof(found_key)));
4068         }
4069
4070         while (1) {
4071                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4072                 if (wret < 0)
4073                         ret = wret;
4074                 if (wret != 0)
4075                         break;
4076
4077                 wret = walk_up_tree(root, &path, wc, &level);
4078                 if (wret < 0)
4079                         ret = wret;
4080                 if (wret != 0)
4081                         break;
4082         }
4083 skip_walking:
4084         btrfs_release_path(&path);
4085
4086         if (!cache_tree_empty(&corrupt_blocks)) {
4087                 struct cache_extent *cache;
4088                 struct btrfs_corrupt_block *corrupt;
4089
4090                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091                        root->root_key.objectid);
4092                 cache = first_cache_extent(&corrupt_blocks);
4093                 while (cache) {
4094                         corrupt = container_of(cache,
4095                                                struct btrfs_corrupt_block,
4096                                                cache);
4097                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098                                cache->start, corrupt->level,
4099                                corrupt->key.objectid, corrupt->key.type,
4100                                corrupt->key.offset);
4101                         cache = next_cache_extent(cache);
4102                 }
4103                 if (repair) {
4104                         printf("Try to repair the btree for root %llu\n",
4105                                root->root_key.objectid);
4106                         ret = repair_btree(root, &corrupt_blocks);
4107                         if (ret < 0)
4108                                 fprintf(stderr, "Failed to repair btree: %s\n",
4109                                         strerror(-ret));
4110                         if (!ret)
4111                                 printf("Btree for root %llu is fixed\n",
4112                                        root->root_key.objectid);
4113                 }
4114         }
4115
4116         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4117         if (err < 0)
4118                 ret = err;
4119
4120         if (root_node.current) {
4121                 root_node.current->checked = 1;
4122                 maybe_free_inode_rec(&root_node.inode_cache,
4123                                 root_node.current);
4124         }
4125
4126         err = check_inode_recs(root, &root_node.inode_cache);
4127         if (!ret)
4128                 ret = err;
4129
4130         free_corrupt_blocks_tree(&corrupt_blocks);
4131         root->fs_info->corrupt_blocks = NULL;
4132         free_orphan_data_extents(&root->orphan_data_extents);
4133         return ret;
4134 }
4135
4136 static int fs_root_objectid(u64 objectid)
4137 {
4138         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4140                 return 1;
4141         return is_fstree(objectid);
4142 }
4143
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145                           struct cache_tree *root_cache)
4146 {
4147         struct btrfs_path path;
4148         struct btrfs_key key;
4149         struct walk_control wc;
4150         struct extent_buffer *leaf, *tree_node;
4151         struct btrfs_root *tmp_root;
4152         struct btrfs_root *tree_root = fs_info->tree_root;
4153         int ret;
4154         int err = 0;
4155
4156         if (ctx.progress_enabled) {
4157                 ctx.tp = TASK_FS_ROOTS;
4158                 task_start(ctx.info);
4159         }
4160
4161         /*
4162          * Just in case we made any changes to the extent tree that weren't
4163          * reflected into the free space cache yet.
4164          */
4165         if (repair)
4166                 reset_cached_block_groups(fs_info);
4167         memset(&wc, 0, sizeof(wc));
4168         cache_tree_init(&wc.shared);
4169         btrfs_init_path(&path);
4170
4171 again:
4172         key.offset = 0;
4173         key.objectid = 0;
4174         key.type = BTRFS_ROOT_ITEM_KEY;
4175         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4176         if (ret < 0) {
4177                 err = 1;
4178                 goto out;
4179         }
4180         tree_node = tree_root->node;
4181         while (1) {
4182                 if (tree_node != tree_root->node) {
4183                         free_root_recs_tree(root_cache);
4184                         btrfs_release_path(&path);
4185                         goto again;
4186                 }
4187                 leaf = path.nodes[0];
4188                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189                         ret = btrfs_next_leaf(tree_root, &path);
4190                         if (ret) {
4191                                 if (ret < 0)
4192                                         err = 1;
4193                                 break;
4194                         }
4195                         leaf = path.nodes[0];
4196                 }
4197                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199                     fs_root_objectid(key.objectid)) {
4200                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201                                 tmp_root = btrfs_read_fs_root_no_cache(
4202                                                 fs_info, &key);
4203                         } else {
4204                                 key.offset = (u64)-1;
4205                                 tmp_root = btrfs_read_fs_root(
4206                                                 fs_info, &key);
4207                         }
4208                         if (IS_ERR(tmp_root)) {
4209                                 err = 1;
4210                                 goto next;
4211                         }
4212                         ret = check_fs_root(tmp_root, root_cache, &wc);
4213                         if (ret == -EAGAIN) {
4214                                 free_root_recs_tree(root_cache);
4215                                 btrfs_release_path(&path);
4216                                 goto again;
4217                         }
4218                         if (ret)
4219                                 err = 1;
4220                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221                                 btrfs_free_fs_root(tmp_root);
4222                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4224                         process_root_ref(leaf, path.slots[0], &key,
4225                                          root_cache);
4226                 }
4227 next:
4228                 path.slots[0]++;
4229         }
4230 out:
4231         btrfs_release_path(&path);
4232         if (err)
4233                 free_extent_cache_tree(&wc.shared);
4234         if (!cache_tree_empty(&wc.shared))
4235                 fprintf(stderr, "warning line %d\n", __LINE__);
4236
4237         task_stop(ctx.info);
4238
4239         return err;
4240 }
4241
4242 /*
4243  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244  * INODE_REF/INODE_EXTREF match.
4245  *
4246  * @root:       the root of the fs/file tree
4247  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4248  * @key:        the key of the DIR_ITEM/DIR_INDEX
4249  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4250  *              distinguish root_dir between normal dir/file
4251  * @name:       the name in the INODE_REF/INODE_EXTREF
4252  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4253  * @mode:       the st_mode of INODE_ITEM
4254  *
4255  * Return 0 if no error occurred.
4256  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4258  * dir/file.
4259  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260  * not match for normal dir/file.
4261  */
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263                          struct btrfs_key *key, u64 index, char *name,
4264                          u32 namelen, u32 mode)
4265 {
4266         struct btrfs_path path;
4267         struct extent_buffer *node;
4268         struct btrfs_dir_item *di;
4269         struct btrfs_key location;
4270         char namebuf[BTRFS_NAME_LEN] = {0};
4271         u32 total;
4272         u32 cur = 0;
4273         u32 len;
4274         u32 name_len;
4275         u32 data_len;
4276         u8 filetype;
4277         int slot;
4278         int ret;
4279
4280         btrfs_init_path(&path);
4281         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4282         if (ret < 0) {
4283                 ret = DIR_ITEM_MISSING;
4284                 goto out;
4285         }
4286
4287         /* Process root dir and goto out*/
4288         if (index == 0) {
4289                 if (ret == 0) {
4290                         ret = ROOT_DIR_ERROR;
4291                         error(
4292                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4293                                 root->objectid,
4294                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4295                                         "REF" : "EXTREF",
4296                                 ref_key->objectid, ref_key->offset,
4297                                 key->type == BTRFS_DIR_ITEM_KEY ?
4298                                         "DIR_ITEM" : "DIR_INDEX");
4299                 } else {
4300                         ret = 0;
4301                 }
4302
4303                 goto out;
4304         }
4305
4306         /* Process normal file/dir */
4307         if (ret > 0) {
4308                 ret = DIR_ITEM_MISSING;
4309                 error(
4310                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4311                         root->objectid,
4312                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313                         ref_key->objectid, ref_key->offset,
4314                         key->type == BTRFS_DIR_ITEM_KEY ?
4315                                 "DIR_ITEM" : "DIR_INDEX",
4316                         key->objectid, key->offset, namelen, name,
4317                         imode_to_type(mode));
4318                 goto out;
4319         }
4320
4321         /* Check whether inode_id/filetype/name match */
4322         node = path.nodes[0];
4323         slot = path.slots[0];
4324         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325         total = btrfs_item_size_nr(node, slot);
4326         while (cur < total) {
4327                 ret = DIR_ITEM_MISMATCH;
4328                 name_len = btrfs_dir_name_len(node, di);
4329                 data_len = btrfs_dir_data_len(node, di);
4330
4331                 btrfs_dir_item_key_to_cpu(node, di, &location);
4332                 if (location.objectid != ref_key->objectid ||
4333                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4334                     location.offset != 0)
4335                         goto next;
4336
4337                 filetype = btrfs_dir_type(node, di);
4338                 if (imode_to_type(mode) != filetype)
4339                         goto next;
4340
4341                 if (cur + sizeof(*di) + name_len > total ||
4342                     name_len > BTRFS_NAME_LEN) {
4343                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4344                                 root->objectid,
4345                                 key->type == BTRFS_DIR_ITEM_KEY ?
4346                                 "DIR_ITEM" : "DIR_INDEX",
4347                                 key->objectid, key->offset, name_len);
4348
4349                         if (cur + sizeof(*di) > total)
4350                                 break;
4351                         len = min_t(u32, total - cur - sizeof(*di),
4352                                     BTRFS_NAME_LEN);
4353                 } else {
4354                         len = name_len;
4355                 }
4356
4357                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358                 if (len != namelen || strncmp(namebuf, name, len))
4359                         goto next;
4360
4361                 ret = 0;
4362                 goto out;
4363 next:
4364                 len = sizeof(*di) + name_len + data_len;
4365                 di = (struct btrfs_dir_item *)((char *)di + len);
4366                 cur += len;
4367         }
4368         if (ret == DIR_ITEM_MISMATCH)
4369                 error(
4370                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4371                         root->objectid,
4372                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373                         ref_key->objectid, ref_key->offset,
4374                         key->type == BTRFS_DIR_ITEM_KEY ?
4375                                 "DIR_ITEM" : "DIR_INDEX",
4376                         key->objectid, key->offset, namelen, name,
4377                         imode_to_type(mode));
4378 out:
4379         btrfs_release_path(&path);
4380         return ret;
4381 }
4382
4383 /*
4384  * Traverse the given INODE_REF and call find_dir_item() to find related
4385  * DIR_ITEM/DIR_INDEX.
4386  *
4387  * @root:       the root of the fs/file tree
4388  * @ref_key:    the key of the INODE_REF
4389  * @refs:       the count of INODE_REF
4390  * @mode:       the st_mode of INODE_ITEM
4391  *
4392  * Return 0 if no error occurred.
4393  */
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395                            struct extent_buffer *node, int slot, u64 *refs,
4396                            int mode)
4397 {
4398         struct btrfs_key key;
4399         struct btrfs_inode_ref *ref;
4400         char namebuf[BTRFS_NAME_LEN] = {0};
4401         u32 total;
4402         u32 cur = 0;
4403         u32 len;
4404         u32 name_len;
4405         u64 index;
4406         int ret, err = 0;
4407
4408         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409         total = btrfs_item_size_nr(node, slot);
4410
4411 next:
4412         /* Update inode ref count */
4413         (*refs)++;
4414
4415         index = btrfs_inode_ref_index(node, ref);
4416         name_len = btrfs_inode_ref_name_len(node, ref);
4417         if (cur + sizeof(*ref) + name_len > total ||
4418             name_len > BTRFS_NAME_LEN) {
4419                 warning("root %llu INODE_REF[%llu %llu] name too long",
4420                         root->objectid, ref_key->objectid, ref_key->offset);
4421
4422                 if (total < cur + sizeof(*ref))
4423                         goto out;
4424                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4425         } else {
4426                 len = name_len;
4427         }
4428
4429         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4430
4431         /* Check root dir ref name */
4432         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434                       root->objectid, ref_key->objectid, ref_key->offset,
4435                       namebuf);
4436                 err |= ROOT_DIR_ERROR;
4437         }
4438
4439         /* Find related DIR_INDEX */
4440         key.objectid = ref_key->offset;
4441         key.type = BTRFS_DIR_INDEX_KEY;
4442         key.offset = index;
4443         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4444         err |= ret;
4445
4446         /* Find related dir_item */
4447         key.objectid = ref_key->offset;
4448         key.type = BTRFS_DIR_ITEM_KEY;
4449         key.offset = btrfs_name_hash(namebuf, len);
4450         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4451         err |= ret;
4452
4453         len = sizeof(*ref) + name_len;
4454         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4455         cur += len;
4456         if (cur < total)
4457                 goto next;
4458
4459 out:
4460         return err;
4461 }
4462
4463 /*
4464  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465  * DIR_ITEM/DIR_INDEX.
4466  *
4467  * @root:       the root of the fs/file tree
4468  * @ref_key:    the key of the INODE_EXTREF
4469  * @refs:       the count of INODE_EXTREF
4470  * @mode:       the st_mode of INODE_ITEM
4471  *
4472  * Return 0 if no error occurred.
4473  */
4474 static int check_inode_extref(struct btrfs_root *root,
4475                               struct btrfs_key *ref_key,
4476                               struct extent_buffer *node, int slot, u64 *refs,
4477                               int mode)
4478 {
4479         struct btrfs_key key;
4480         struct btrfs_inode_extref *extref;
4481         char namebuf[BTRFS_NAME_LEN] = {0};
4482         u32 total;
4483         u32 cur = 0;
4484         u32 len;
4485         u32 name_len;
4486         u64 index;
4487         u64 parent;
4488         int ret;
4489         int err = 0;
4490
4491         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492         total = btrfs_item_size_nr(node, slot);
4493
4494 next:
4495         /* update inode ref count */
4496         (*refs)++;
4497         name_len = btrfs_inode_extref_name_len(node, extref);
4498         index = btrfs_inode_extref_index(node, extref);
4499         parent = btrfs_inode_extref_parent(node, extref);
4500         if (name_len <= BTRFS_NAME_LEN) {
4501                 len = name_len;
4502         } else {
4503                 len = BTRFS_NAME_LEN;
4504                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505                         root->objectid, ref_key->objectid, ref_key->offset);
4506         }
4507         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4508
4509         /* Check root dir ref name */
4510         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512                       root->objectid, ref_key->objectid, ref_key->offset,
4513                       namebuf);
4514                 err |= ROOT_DIR_ERROR;
4515         }
4516
4517         /* find related dir_index */
4518         key.objectid = parent;
4519         key.type = BTRFS_DIR_INDEX_KEY;
4520         key.offset = index;
4521         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4522         err |= ret;
4523
4524         /* find related dir_item */
4525         key.objectid = parent;
4526         key.type = BTRFS_DIR_ITEM_KEY;
4527         key.offset = btrfs_name_hash(namebuf, len);
4528         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4529         err |= ret;
4530
4531         len = sizeof(*extref) + name_len;
4532         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4533         cur += len;
4534
4535         if (cur < total)
4536                 goto next;
4537
4538         return err;
4539 }
4540
4541 /*
4542  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543  * DIR_ITEM/DIR_INDEX match.
4544  * Return with @index_ret.
4545  *
4546  * @root:       the root of the fs/file tree
4547  * @key:        the key of the INODE_REF/INODE_EXTREF
4548  * @name:       the name in the INODE_REF/INODE_EXTREF
4549  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4550  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4551  *              value (64)-1 means do not check index
4552  * @ext_ref:    the EXTENDED_IREF feature
4553  *
4554  * Return 0 if no error occurred.
4555  * Return >0 for error bitmap
4556  */
4557 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4558                           char *name, int namelen, u64 *index_ret,
4559                           unsigned int ext_ref)
4560 {
4561         struct btrfs_path path;
4562         struct btrfs_inode_ref *ref;
4563         struct btrfs_inode_extref *extref;
4564         struct extent_buffer *node;
4565         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4566         u32 total;
4567         u32 cur = 0;
4568         u32 len;
4569         u32 ref_namelen;
4570         u64 ref_index;
4571         u64 parent;
4572         u64 dir_id;
4573         int slot;
4574         int ret;
4575
4576         ASSERT(index_ret);
4577
4578         btrfs_init_path(&path);
4579         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4580         if (ret) {
4581                 ret = INODE_REF_MISSING;
4582                 goto extref;
4583         }
4584
4585         node = path.nodes[0];
4586         slot = path.slots[0];
4587
4588         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4589         total = btrfs_item_size_nr(node, slot);
4590
4591         /* Iterate all entry of INODE_REF */
4592         while (cur < total) {
4593                 ret = INODE_REF_MISSING;
4594
4595                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4596                 ref_index = btrfs_inode_ref_index(node, ref);
4597                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4598                         goto next_ref;
4599
4600                 if (cur + sizeof(*ref) + ref_namelen > total ||
4601                     ref_namelen > BTRFS_NAME_LEN) {
4602                         warning("root %llu INODE %s[%llu %llu] name too long",
4603                                 root->objectid,
4604                                 key->type == BTRFS_INODE_REF_KEY ?
4605                                         "REF" : "EXTREF",
4606                                 key->objectid, key->offset);
4607
4608                         if (cur + sizeof(*ref) > total)
4609                                 break;
4610                         len = min_t(u32, total - cur - sizeof(*ref),
4611                                     BTRFS_NAME_LEN);
4612                 } else {
4613                         len = ref_namelen;
4614                 }
4615
4616                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4617                                    len);
4618
4619                 if (len != namelen || strncmp(ref_namebuf, name, len))
4620                         goto next_ref;
4621
4622                 *index_ret = ref_index;
4623                 ret = 0;
4624                 goto out;
4625 next_ref:
4626                 len = sizeof(*ref) + ref_namelen;
4627                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4628                 cur += len;
4629         }
4630
4631 extref:
4632         /* Skip if not support EXTENDED_IREF feature */
4633         if (!ext_ref)
4634                 goto out;
4635
4636         btrfs_release_path(&path);
4637         btrfs_init_path(&path);
4638
4639         dir_id = key->offset;
4640         key->type = BTRFS_INODE_EXTREF_KEY;
4641         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4642
4643         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4644         if (ret) {
4645                 ret = INODE_REF_MISSING;
4646                 goto out;
4647         }
4648
4649         node = path.nodes[0];
4650         slot = path.slots[0];
4651
4652         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4653         cur = 0;
4654         total = btrfs_item_size_nr(node, slot);
4655
4656         /* Iterate all entry of INODE_EXTREF */
4657         while (cur < total) {
4658                 ret = INODE_REF_MISSING;
4659
4660                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4661                 ref_index = btrfs_inode_extref_index(node, extref);
4662                 parent = btrfs_inode_extref_parent(node, extref);
4663                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4664                         goto next_extref;
4665
4666                 if (parent != dir_id)
4667                         goto next_extref;
4668
4669                 if (ref_namelen <= BTRFS_NAME_LEN) {
4670                         len = ref_namelen;
4671                 } else {
4672                         len = BTRFS_NAME_LEN;
4673                         warning("root %llu INODE %s[%llu %llu] name too long",
4674                                 root->objectid,
4675                                 key->type == BTRFS_INODE_REF_KEY ?
4676                                         "REF" : "EXTREF",
4677                                 key->objectid, key->offset);
4678                 }
4679                 read_extent_buffer(node, ref_namebuf,
4680                                    (unsigned long)(extref + 1), len);
4681
4682                 if (len != namelen || strncmp(ref_namebuf, name, len))
4683                         goto next_extref;
4684
4685                 *index_ret = ref_index;
4686                 ret = 0;
4687                 goto out;
4688
4689 next_extref:
4690                 len = sizeof(*extref) + ref_namelen;
4691                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4692                 cur += len;
4693
4694         }
4695 out:
4696         btrfs_release_path(&path);
4697         return ret;
4698 }
4699
4700 /*
4701  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4702  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4703  *
4704  * @root:       the root of the fs/file tree
4705  * @key:        the key of the INODE_REF/INODE_EXTREF
4706  * @size:       the st_size of the INODE_ITEM
4707  * @ext_ref:    the EXTENDED_IREF feature
4708  *
4709  * Return 0 if no error occurred.
4710  */
4711 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4712                           struct extent_buffer *node, int slot, u64 *size,
4713                           unsigned int ext_ref)
4714 {
4715         struct btrfs_dir_item *di;
4716         struct btrfs_inode_item *ii;
4717         struct btrfs_path path;
4718         struct btrfs_key location;
4719         char namebuf[BTRFS_NAME_LEN] = {0};
4720         u32 total;
4721         u32 cur = 0;
4722         u32 len;
4723         u32 name_len;
4724         u32 data_len;
4725         u8 filetype;
4726         u32 mode;
4727         u64 index;
4728         int ret;
4729         int err = 0;
4730
4731         /*
4732          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4733          * ignore index check.
4734          */
4735         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4736
4737         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4738         total = btrfs_item_size_nr(node, slot);
4739
4740         while (cur < total) {
4741                 data_len = btrfs_dir_data_len(node, di);
4742                 if (data_len)
4743                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4744                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4745                               "DIR_ITEM" : "DIR_INDEX",
4746                               key->objectid, key->offset, data_len);
4747
4748                 name_len = btrfs_dir_name_len(node, di);
4749                 if (cur + sizeof(*di) + name_len > total ||
4750                     name_len > BTRFS_NAME_LEN) {
4751                         warning("root %llu %s[%llu %llu] name too long",
4752                                 root->objectid,
4753                                 key->type == BTRFS_DIR_ITEM_KEY ?
4754                                 "DIR_ITEM" : "DIR_INDEX",
4755                                 key->objectid, key->offset);
4756
4757                         if (cur + sizeof(*di) > total)
4758                                 break;
4759                         len = min_t(u32, total - cur - sizeof(*di),
4760                                     BTRFS_NAME_LEN);
4761                 } else {
4762                         len = name_len;
4763                 }
4764                 (*size) += name_len;
4765
4766                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4767                 filetype = btrfs_dir_type(node, di);
4768
4769                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4770                     key->offset != btrfs_name_hash(namebuf, len)) {
4771                         err |= -EIO;
4772                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4773                                 root->objectid, key->objectid, key->offset,
4774                                 namebuf, len, filetype, key->offset,
4775                                 btrfs_name_hash(namebuf, len));
4776                 }
4777
4778                 btrfs_init_path(&path);
4779                 btrfs_dir_item_key_to_cpu(node, di, &location);
4780
4781                 /* Ignore related ROOT_ITEM check */
4782                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4783                         goto next;
4784
4785                 /* Check relative INODE_ITEM(existence/filetype) */
4786                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4787                 if (ret) {
4788                         err |= INODE_ITEM_MISSING;
4789                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4790                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4791                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4792                               key->offset, location.objectid, name_len,
4793                               namebuf, filetype);
4794                         goto next;
4795                 }
4796
4797                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4798                                     struct btrfs_inode_item);
4799                 mode = btrfs_inode_mode(path.nodes[0], ii);
4800
4801                 if (imode_to_type(mode) != filetype) {
4802                         err |= INODE_ITEM_MISMATCH;
4803                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4804                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4805                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4806                               key->offset, name_len, namebuf, filetype);
4807                 }
4808
4809                 /* Check relative INODE_REF/INODE_EXTREF */
4810                 location.type = BTRFS_INODE_REF_KEY;
4811                 location.offset = key->objectid;
4812                 ret = find_inode_ref(root, &location, namebuf, len,
4813                                      &index, ext_ref);
4814                 err |= ret;
4815                 if (ret & INODE_REF_MISSING)
4816                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4817                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4818                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4819                               key->offset, name_len, namebuf, filetype);
4820
4821 next:
4822                 btrfs_release_path(&path);
4823                 len = sizeof(*di) + name_len + data_len;
4824                 di = (struct btrfs_dir_item *)((char *)di + len);
4825                 cur += len;
4826
4827                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4828                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4829                               root->objectid, key->objectid, key->offset);
4830                         break;
4831                 }
4832         }
4833
4834         return err;
4835 }
4836
4837 /*
4838  * Check file extent datasum/hole, update the size of the file extents,
4839  * check and update the last offset of the file extent.
4840  *
4841  * @root:       the root of fs/file tree.
4842  * @fkey:       the key of the file extent.
4843  * @nodatasum:  INODE_NODATASUM feature.
4844  * @size:       the sum of all EXTENT_DATA items size for this inode.
4845  * @end:        the offset of the last extent.
4846  *
4847  * Return 0 if no error occurred.
4848  */
4849 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4850                              struct extent_buffer *node, int slot,
4851                              unsigned int nodatasum, u64 *size, u64 *end)
4852 {
4853         struct btrfs_file_extent_item *fi;
4854         u64 disk_bytenr;
4855         u64 disk_num_bytes;
4856         u64 extent_num_bytes;
4857         u64 extent_offset;
4858         u64 csum_found;         /* In byte size, sectorsize aligned */
4859         u64 search_start;       /* Logical range start we search for csum */
4860         u64 search_len;         /* Logical range len we search for csum */
4861         unsigned int extent_type;
4862         unsigned int is_hole;
4863         int compressed = 0;
4864         int ret;
4865         int err = 0;
4866
4867         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4868
4869         /* Check inline extent */
4870         extent_type = btrfs_file_extent_type(node, fi);
4871         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4872                 struct btrfs_item *e = btrfs_item_nr(slot);
4873                 u32 item_inline_len;
4874
4875                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4876                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4877                 compressed = btrfs_file_extent_compression(node, fi);
4878                 if (extent_num_bytes == 0) {
4879                         error(
4880                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4881                                 root->objectid, fkey->objectid, fkey->offset);
4882                         err |= FILE_EXTENT_ERROR;
4883                 }
4884                 if (!compressed && extent_num_bytes != item_inline_len) {
4885                         error(
4886                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4887                                 root->objectid, fkey->objectid, fkey->offset,
4888                                 extent_num_bytes, item_inline_len);
4889                         err |= FILE_EXTENT_ERROR;
4890                 }
4891                 *end += extent_num_bytes;
4892                 *size += extent_num_bytes;
4893                 return err;
4894         }
4895
4896         /* Check extent type */
4897         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4898                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4899                 err |= FILE_EXTENT_ERROR;
4900                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4901                       root->objectid, fkey->objectid, fkey->offset);
4902                 return err;
4903         }
4904
4905         /* Check REG_EXTENT/PREALLOC_EXTENT */
4906         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4907         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4908         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4909         extent_offset = btrfs_file_extent_offset(node, fi);
4910         compressed = btrfs_file_extent_compression(node, fi);
4911         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4912
4913         /*
4914          * Check EXTENT_DATA csum
4915          *
4916          * For plain (uncompressed) extent, we should only check the range
4917          * we're referring to, as it's possible that part of prealloc extent
4918          * has been written, and has csum:
4919          *
4920          * |<--- Original large preallocated extent A ---->|
4921          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4922          *      No csum                         Has csum
4923          *
4924          * For compressed extent, we should check the whole range.
4925          */
4926         if (!compressed) {
4927                 search_start = disk_bytenr + extent_offset;
4928                 search_len = extent_num_bytes;
4929         } else {
4930                 search_start = disk_bytenr;
4931                 search_len = disk_num_bytes;
4932         }
4933         ret = count_csum_range(root, search_start, search_len, &csum_found);
4934         if (csum_found > 0 && nodatasum) {
4935                 err |= ODD_CSUM_ITEM;
4936                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4937                       root->objectid, fkey->objectid, fkey->offset);
4938         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4939                    !is_hole && (ret < 0 || csum_found < search_len)) {
4940                 err |= CSUM_ITEM_MISSING;
4941                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4942                       root->objectid, fkey->objectid, fkey->offset,
4943                       csum_found, search_len);
4944         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4945                 err |= ODD_CSUM_ITEM;
4946                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4947                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4948         }
4949
4950         /* Check EXTENT_DATA hole */
4951         if (!no_holes && *end != fkey->offset) {
4952                 err |= FILE_EXTENT_ERROR;
4953                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4954                       root->objectid, fkey->objectid, fkey->offset);
4955         }
4956
4957         *end += extent_num_bytes;
4958         if (!is_hole)
4959                 *size += extent_num_bytes;
4960
4961         return err;
4962 }
4963
4964 /*
4965  * Set inode item nbytes to @nbytes
4966  *
4967  * Returns  0     on success
4968  * Returns  != 0  on error
4969  */
4970 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4971                                       struct btrfs_path *path,
4972                                       u64 ino, u64 nbytes)
4973 {
4974         struct btrfs_trans_handle *trans;
4975         struct btrfs_inode_item *ii;
4976         struct btrfs_key key;
4977         struct btrfs_key research_key;
4978         int err = 0;
4979         int ret;
4980
4981         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4982
4983         key.objectid = ino;
4984         key.type = BTRFS_INODE_ITEM_KEY;
4985         key.offset = 0;
4986
4987         trans = btrfs_start_transaction(root, 1);
4988         if (IS_ERR(trans)) {
4989                 ret = PTR_ERR(trans);
4990                 err |= ret;
4991                 goto out;
4992         }
4993
4994         btrfs_release_path(path);
4995         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4996         if (ret > 0)
4997                 ret = -ENOENT;
4998         if (ret) {
4999                 err |= ret;
5000                 goto fail;
5001         }
5002
5003         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5004                             struct btrfs_inode_item);
5005         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5006         btrfs_mark_buffer_dirty(path->nodes[0]);
5007 fail:
5008         btrfs_commit_transaction(trans, root);
5009 out:
5010         if (ret)
5011                 error("failed to set nbytes in inode %llu root %llu",
5012                       ino, root->root_key.objectid);
5013         else
5014                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5015                        root->root_key.objectid, nbytes);
5016
5017         /* research path */
5018         btrfs_release_path(path);
5019         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5020         err |= ret;
5021
5022         return err;
5023 }
5024
5025 /*
5026  * Set directory inode isize to @isize.
5027  *
5028  * Returns 0     on success.
5029  * Returns != 0  on error.
5030  */
5031 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5032                                    struct btrfs_path *path,
5033                                    u64 ino, u64 isize)
5034 {
5035         struct btrfs_trans_handle *trans;
5036         struct btrfs_inode_item *ii;
5037         struct btrfs_key key;
5038         struct btrfs_key research_key;
5039         int ret;
5040         int err = 0;
5041
5042         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5043
5044         key.objectid = ino;
5045         key.type = BTRFS_INODE_ITEM_KEY;
5046         key.offset = 0;
5047
5048         trans = btrfs_start_transaction(root, 1);
5049         if (IS_ERR(trans)) {
5050                 ret = PTR_ERR(trans);
5051                 err |= ret;
5052                 goto out;
5053         }
5054
5055         btrfs_release_path(path);
5056         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5057         if (ret > 0)
5058                 ret = -ENOENT;
5059         if (ret) {
5060                 err |= ret;
5061                 goto fail;
5062         }
5063
5064         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5065                             struct btrfs_inode_item);
5066         btrfs_set_inode_size(path->nodes[0], ii, isize);
5067         btrfs_mark_buffer_dirty(path->nodes[0]);
5068 fail:
5069         btrfs_commit_transaction(trans, root);
5070 out:
5071         if (ret)
5072                 error("failed to set isize in inode %llu root %llu",
5073                       ino, root->root_key.objectid);
5074         else
5075                 printf("Set isize in inode %llu root %llu to %llu\n",
5076                        ino, root->root_key.objectid, isize);
5077
5078         btrfs_release_path(path);
5079         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5080         err |= ret;
5081
5082         return err;
5083 }
5084
5085 /*
5086  * Wrapper function for btrfs_add_orphan_item().
5087  *
5088  * Returns 0     on success.
5089  * Returns != 0  on error.
5090  */
5091 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5092                                            struct btrfs_path *path, u64 ino)
5093 {
5094         struct btrfs_trans_handle *trans;
5095         struct btrfs_key research_key;
5096         int ret;
5097         int err = 0;
5098
5099         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5100
5101         trans = btrfs_start_transaction(root, 1);
5102         if (IS_ERR(trans)) {
5103                 ret = PTR_ERR(trans);
5104                 err |= ret;
5105                 goto out;
5106         }
5107
5108         btrfs_release_path(path);
5109         ret = btrfs_add_orphan_item(trans, root, path, ino);
5110         err |= ret;
5111         btrfs_commit_transaction(trans, root);
5112 out:
5113         if (ret)
5114                 error("failed to add inode %llu as orphan item root %llu",
5115                       ino, root->root_key.objectid);
5116         else
5117                 printf("Added inode %llu as orphan item root %llu\n",
5118                        ino, root->root_key.objectid);
5119
5120         btrfs_release_path(path);
5121         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5122         err |= ret;
5123
5124         return err;
5125 }
5126
5127 /*
5128  * Check INODE_ITEM and related ITEMs (the same inode number)
5129  * 1. check link count
5130  * 2. check inode ref/extref
5131  * 3. check dir item/index
5132  *
5133  * @ext_ref:    the EXTENDED_IREF feature
5134  *
5135  * Return 0 if no error occurred.
5136  * Return >0 for error or hit the traversal is done(by error bitmap)
5137  */
5138 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5139                             unsigned int ext_ref)
5140 {
5141         struct extent_buffer *node;
5142         struct btrfs_inode_item *ii;
5143         struct btrfs_key key;
5144         u64 inode_id;
5145         u32 mode;
5146         u64 nlink;
5147         u64 nbytes;
5148         u64 isize;
5149         u64 size = 0;
5150         u64 refs = 0;
5151         u64 extent_end = 0;
5152         u64 extent_size = 0;
5153         unsigned int dir;
5154         unsigned int nodatasum;
5155         int slot;
5156         int ret;
5157         int err = 0;
5158
5159         node = path->nodes[0];
5160         slot = path->slots[0];
5161
5162         btrfs_item_key_to_cpu(node, &key, slot);
5163         inode_id = key.objectid;
5164
5165         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5166                 ret = btrfs_next_item(root, path);
5167                 if (ret > 0)
5168                         err |= LAST_ITEM;
5169                 return err;
5170         }
5171
5172         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5173         isize = btrfs_inode_size(node, ii);
5174         nbytes = btrfs_inode_nbytes(node, ii);
5175         mode = btrfs_inode_mode(node, ii);
5176         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5177         nlink = btrfs_inode_nlink(node, ii);
5178         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5179
5180         while (1) {
5181                 ret = btrfs_next_item(root, path);
5182                 if (ret < 0) {
5183                         /* out will fill 'err' rusing current statistics */
5184                         goto out;
5185                 } else if (ret > 0) {
5186                         err |= LAST_ITEM;
5187                         goto out;
5188                 }
5189
5190                 node = path->nodes[0];
5191                 slot = path->slots[0];
5192                 btrfs_item_key_to_cpu(node, &key, slot);
5193                 if (key.objectid != inode_id)
5194                         goto out;
5195
5196                 switch (key.type) {
5197                 case BTRFS_INODE_REF_KEY:
5198                         ret = check_inode_ref(root, &key, node, slot, &refs,
5199                                               mode);
5200                         err |= ret;
5201                         break;
5202                 case BTRFS_INODE_EXTREF_KEY:
5203                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5204                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5205                                         root->objectid, key.objectid,
5206                                         key.offset);
5207                         ret = check_inode_extref(root, &key, node, slot, &refs,
5208                                                  mode);
5209                         err |= ret;
5210                         break;
5211                 case BTRFS_DIR_ITEM_KEY:
5212                 case BTRFS_DIR_INDEX_KEY:
5213                         if (!dir) {
5214                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5215                                         root->objectid, inode_id,
5216                                         imode_to_type(mode), key.objectid,
5217                                         key.offset);
5218                         }
5219                         ret = check_dir_item(root, &key, node, slot, &size,
5220                                              ext_ref);
5221                         err |= ret;
5222                         break;
5223                 case BTRFS_EXTENT_DATA_KEY:
5224                         if (dir) {
5225                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5226                                         root->objectid, inode_id, key.objectid,
5227                                         key.offset);
5228                         }
5229                         ret = check_file_extent(root, &key, node, slot,
5230                                                 nodatasum, &extent_size,
5231                                                 &extent_end);
5232                         err |= ret;
5233                         break;
5234                 case BTRFS_XATTR_ITEM_KEY:
5235                         break;
5236                 default:
5237                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5238                               key.objectid, key.type, key.offset);
5239                 }
5240         }
5241
5242 out:
5243         /* verify INODE_ITEM nlink/isize/nbytes */
5244         if (dir) {
5245                 if (nlink != 1) {
5246                         err |= LINK_COUNT_ERROR;
5247                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5248                               root->objectid, inode_id, nlink);
5249                 }
5250
5251                 /*
5252                  * Just a warning, as dir inode nbytes is just an
5253                  * instructive value.
5254                  */
5255                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5256                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5257                                 root->objectid, inode_id,
5258                                 root->fs_info->nodesize);
5259                 }
5260
5261                 if (isize != size) {
5262                         if (repair)
5263                                 ret = repair_dir_isize_lowmem(root, path,
5264                                                               inode_id, size);
5265                         if (!repair || ret) {
5266                                 err |= ISIZE_ERROR;
5267                                 error(
5268                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5269                                       root->objectid, inode_id, isize, size);
5270                         }
5271                 }
5272         } else {
5273                 if (nlink != refs) {
5274                         err |= LINK_COUNT_ERROR;
5275                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5276                               root->objectid, inode_id, nlink, refs);
5277                 } else if (!nlink) {
5278                         if (repair)
5279                                 ret = repair_inode_orphan_item_lowmem(root,
5280                                                               path, inode_id);
5281                         if (!repair || ret) {
5282                                 err |= ORPHAN_ITEM;
5283                                 error("root %llu INODE[%llu] is orphan item",
5284                                       root->objectid, inode_id);
5285                         }
5286                 }
5287
5288                 if (!nbytes && !no_holes && extent_end < isize) {
5289                         err |= NBYTES_ERROR;
5290                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5291                               root->objectid, inode_id, isize);
5292                 }
5293
5294                 if (nbytes != extent_size) {
5295                         if (repair)
5296                                 ret = repair_inode_nbytes_lowmem(root, path,
5297                                                          inode_id, extent_size);
5298                         if (!repair || ret) {
5299                                 err |= NBYTES_ERROR;
5300                                 error(
5301         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5302                                       root->objectid, inode_id, nbytes,
5303                                       extent_size);
5304                         }
5305                 }
5306         }
5307
5308         return err;
5309 }
5310
5311 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5312 {
5313         struct btrfs_path path;
5314         struct btrfs_key key;
5315         int err = 0;
5316         int ret;
5317
5318         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5319         key.type = BTRFS_INODE_ITEM_KEY;
5320         key.offset = 0;
5321
5322         /* For root being dropped, we don't need to check first inode */
5323         if (btrfs_root_refs(&root->root_item) == 0 &&
5324             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5325             key.objectid)
5326                 return 0;
5327
5328         btrfs_init_path(&path);
5329
5330         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5331         if (ret < 0)
5332                 goto out;
5333         if (ret > 0) {
5334                 ret = 0;
5335                 err |= INODE_ITEM_MISSING;
5336                 error("first inode item of root %llu is missing",
5337                       root->objectid);
5338         }
5339
5340         err |= check_inode_item(root, &path, ext_ref);
5341         err &= ~LAST_ITEM;
5342         if (err && !ret)
5343                 ret = -EIO;
5344 out:
5345         btrfs_release_path(&path);
5346         return ret;
5347 }
5348
5349 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5350                                                 u64 parent, u64 root)
5351 {
5352         struct rb_node *node;
5353         struct tree_backref *back = NULL;
5354         struct tree_backref match = {
5355                 .node = {
5356                         .is_data = 0,
5357                 },
5358         };
5359
5360         if (parent) {
5361                 match.parent = parent;
5362                 match.node.full_backref = 1;
5363         } else {
5364                 match.root = root;
5365         }
5366
5367         node = rb_search(&rec->backref_tree, &match.node.node,
5368                          (rb_compare_keys)compare_extent_backref, NULL);
5369         if (node)
5370                 back = to_tree_backref(rb_node_to_extent_backref(node));
5371
5372         return back;
5373 }
5374
5375 static struct data_backref *find_data_backref(struct extent_record *rec,
5376                                                 u64 parent, u64 root,
5377                                                 u64 owner, u64 offset,
5378                                                 int found_ref,
5379                                                 u64 disk_bytenr, u64 bytes)
5380 {
5381         struct rb_node *node;
5382         struct data_backref *back = NULL;
5383         struct data_backref match = {
5384                 .node = {
5385                         .is_data = 1,
5386                 },
5387                 .owner = owner,
5388                 .offset = offset,
5389                 .bytes = bytes,
5390                 .found_ref = found_ref,
5391                 .disk_bytenr = disk_bytenr,
5392         };
5393
5394         if (parent) {
5395                 match.parent = parent;
5396                 match.node.full_backref = 1;
5397         } else {
5398                 match.root = root;
5399         }
5400
5401         node = rb_search(&rec->backref_tree, &match.node.node,
5402                          (rb_compare_keys)compare_extent_backref, NULL);
5403         if (node)
5404                 back = to_data_backref(rb_node_to_extent_backref(node));
5405
5406         return back;
5407 }
5408 /*
5409  * Iterate all item on the tree and call check_inode_item() to check.
5410  *
5411  * @root:       the root of the tree to be checked.
5412  * @ext_ref:    the EXTENDED_IREF feature
5413  *
5414  * Return 0 if no error found.
5415  * Return <0 for error.
5416  */
5417 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5418 {
5419         struct btrfs_path path;
5420         struct node_refs nrefs;
5421         struct btrfs_root_item *root_item = &root->root_item;
5422         int ret;
5423         int level;
5424         int err = 0;
5425
5426         /*
5427          * We need to manually check the first inode item(256)
5428          * As the following traversal function will only start from
5429          * the first inode item in the leaf, if inode item(256) is missing
5430          * we will just skip it forever.
5431          */
5432         ret = check_fs_first_inode(root, ext_ref);
5433         if (ret < 0)
5434                 return ret;
5435
5436         memset(&nrefs, 0, sizeof(nrefs));
5437         level = btrfs_header_level(root->node);
5438         btrfs_init_path(&path);
5439
5440         if (btrfs_root_refs(root_item) > 0 ||
5441             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5442                 path.nodes[level] = root->node;
5443                 path.slots[level] = 0;
5444                 extent_buffer_get(root->node);
5445         } else {
5446                 struct btrfs_key key;
5447
5448                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5449                 level = root_item->drop_level;
5450                 path.lowest_level = level;
5451                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5452                 if (ret < 0)
5453                         goto out;
5454                 ret = 0;
5455         }
5456
5457         while (1) {
5458                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5459                 err |= !!ret;
5460
5461                 /* if ret is negative, walk shall stop */
5462                 if (ret < 0) {
5463                         ret = err;
5464                         break;
5465                 }
5466
5467                 ret = walk_up_tree_v2(root, &path, &level);
5468                 if (ret != 0) {
5469                         /* Normal exit, reset ret to err */
5470                         ret = err;
5471                         break;
5472                 }
5473         }
5474
5475 out:
5476         btrfs_release_path(&path);
5477         return ret;
5478 }
5479
5480 /*
5481  * Find the relative ref for root_ref and root_backref.
5482  *
5483  * @root:       the root of the root tree.
5484  * @ref_key:    the key of the root ref.
5485  *
5486  * Return 0 if no error occurred.
5487  */
5488 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5489                           struct extent_buffer *node, int slot)
5490 {
5491         struct btrfs_path path;
5492         struct btrfs_key key;
5493         struct btrfs_root_ref *ref;
5494         struct btrfs_root_ref *backref;
5495         char ref_name[BTRFS_NAME_LEN] = {0};
5496         char backref_name[BTRFS_NAME_LEN] = {0};
5497         u64 ref_dirid;
5498         u64 ref_seq;
5499         u32 ref_namelen;
5500         u64 backref_dirid;
5501         u64 backref_seq;
5502         u32 backref_namelen;
5503         u32 len;
5504         int ret;
5505         int err = 0;
5506
5507         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5508         ref_dirid = btrfs_root_ref_dirid(node, ref);
5509         ref_seq = btrfs_root_ref_sequence(node, ref);
5510         ref_namelen = btrfs_root_ref_name_len(node, ref);
5511
5512         if (ref_namelen <= BTRFS_NAME_LEN) {
5513                 len = ref_namelen;
5514         } else {
5515                 len = BTRFS_NAME_LEN;
5516                 warning("%s[%llu %llu] ref_name too long",
5517                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5518                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5519                         ref_key->offset);
5520         }
5521         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5522
5523         /* Find relative root_ref */
5524         key.objectid = ref_key->offset;
5525         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5526         key.offset = ref_key->objectid;
5527
5528         btrfs_init_path(&path);
5529         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5530         if (ret) {
5531                 err |= ROOT_REF_MISSING;
5532                 error("%s[%llu %llu] couldn't find relative ref",
5533                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5534                       "ROOT_REF" : "ROOT_BACKREF",
5535                       ref_key->objectid, ref_key->offset);
5536                 goto out;
5537         }
5538
5539         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5540                                  struct btrfs_root_ref);
5541         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5542         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5543         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5544
5545         if (backref_namelen <= BTRFS_NAME_LEN) {
5546                 len = backref_namelen;
5547         } else {
5548                 len = BTRFS_NAME_LEN;
5549                 warning("%s[%llu %llu] ref_name too long",
5550                         key.type == BTRFS_ROOT_REF_KEY ?
5551                         "ROOT_REF" : "ROOT_BACKREF",
5552                         key.objectid, key.offset);
5553         }
5554         read_extent_buffer(path.nodes[0], backref_name,
5555                            (unsigned long)(backref + 1), len);
5556
5557         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5558             ref_namelen != backref_namelen ||
5559             strncmp(ref_name, backref_name, len)) {
5560                 err |= ROOT_REF_MISMATCH;
5561                 error("%s[%llu %llu] mismatch relative ref",
5562                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5563                       "ROOT_REF" : "ROOT_BACKREF",
5564                       ref_key->objectid, ref_key->offset);
5565         }
5566 out:
5567         btrfs_release_path(&path);
5568         return err;
5569 }
5570
5571 /*
5572  * Check all fs/file tree in low_memory mode.
5573  *
5574  * 1. for fs tree root item, call check_fs_root_v2()
5575  * 2. for fs tree root ref/backref, call check_root_ref()
5576  *
5577  * Return 0 if no error occurred.
5578  */
5579 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5580 {
5581         struct btrfs_root *tree_root = fs_info->tree_root;
5582         struct btrfs_root *cur_root = NULL;
5583         struct btrfs_path path;
5584         struct btrfs_key key;
5585         struct extent_buffer *node;
5586         unsigned int ext_ref;
5587         int slot;
5588         int ret;
5589         int err = 0;
5590
5591         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5592
5593         btrfs_init_path(&path);
5594         key.objectid = BTRFS_FS_TREE_OBJECTID;
5595         key.offset = 0;
5596         key.type = BTRFS_ROOT_ITEM_KEY;
5597
5598         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5599         if (ret < 0) {
5600                 err = ret;
5601                 goto out;
5602         } else if (ret > 0) {
5603                 err = -ENOENT;
5604                 goto out;
5605         }
5606
5607         while (1) {
5608                 node = path.nodes[0];
5609                 slot = path.slots[0];
5610                 btrfs_item_key_to_cpu(node, &key, slot);
5611                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5612                         goto out;
5613                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5614                     fs_root_objectid(key.objectid)) {
5615                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5616                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5617                                                                        &key);
5618                         } else {
5619                                 key.offset = (u64)-1;
5620                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5621                         }
5622
5623                         if (IS_ERR(cur_root)) {
5624                                 error("Fail to read fs/subvol tree: %lld",
5625                                       key.objectid);
5626                                 err = -EIO;
5627                                 goto next;
5628                         }
5629
5630                         ret = check_fs_root_v2(cur_root, ext_ref);
5631                         err |= ret;
5632
5633                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5634                                 btrfs_free_fs_root(cur_root);
5635                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5636                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5637                         ret = check_root_ref(tree_root, &key, node, slot);
5638                         err |= ret;
5639                 }
5640 next:
5641                 ret = btrfs_next_item(tree_root, &path);
5642                 if (ret > 0)
5643                         goto out;
5644                 if (ret < 0) {
5645                         err = ret;
5646                         goto out;
5647                 }
5648         }
5649
5650 out:
5651         btrfs_release_path(&path);
5652         return err;
5653 }
5654
5655 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5656                           struct cache_tree *root_cache)
5657 {
5658         int ret;
5659
5660         if (!ctx.progress_enabled)
5661                 fprintf(stderr, "checking fs roots\n");
5662         if (check_mode == CHECK_MODE_LOWMEM)
5663                 ret = check_fs_roots_v2(fs_info);
5664         else
5665                 ret = check_fs_roots(fs_info, root_cache);
5666
5667         return ret;
5668 }
5669
5670 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5671 {
5672         struct extent_backref *back, *tmp;
5673         struct tree_backref *tback;
5674         struct data_backref *dback;
5675         u64 found = 0;
5676         int err = 0;
5677
5678         rbtree_postorder_for_each_entry_safe(back, tmp,
5679                                              &rec->backref_tree, node) {
5680                 if (!back->found_extent_tree) {
5681                         err = 1;
5682                         if (!print_errs)
5683                                 goto out;
5684                         if (back->is_data) {
5685                                 dback = to_data_backref(back);
5686                                 fprintf(stderr, "Data backref %llu %s %llu"
5687                                         " owner %llu offset %llu num_refs %lu"
5688                                         " not found in extent tree\n",
5689                                         (unsigned long long)rec->start,
5690                                         back->full_backref ?
5691                                         "parent" : "root",
5692                                         back->full_backref ?
5693                                         (unsigned long long)dback->parent:
5694                                         (unsigned long long)dback->root,
5695                                         (unsigned long long)dback->owner,
5696                                         (unsigned long long)dback->offset,
5697                                         (unsigned long)dback->num_refs);
5698                         } else {
5699                                 tback = to_tree_backref(back);
5700                                 fprintf(stderr, "Tree backref %llu parent %llu"
5701                                         " root %llu not found in extent tree\n",
5702                                         (unsigned long long)rec->start,
5703                                         (unsigned long long)tback->parent,
5704                                         (unsigned long long)tback->root);
5705                         }
5706                 }
5707                 if (!back->is_data && !back->found_ref) {
5708                         err = 1;
5709                         if (!print_errs)
5710                                 goto out;
5711                         tback = to_tree_backref(back);
5712                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5713                                 (unsigned long long)rec->start,
5714                                 back->full_backref ? "parent" : "root",
5715                                 back->full_backref ?
5716                                 (unsigned long long)tback->parent :
5717                                 (unsigned long long)tback->root, back);
5718                 }
5719                 if (back->is_data) {
5720                         dback = to_data_backref(back);
5721                         if (dback->found_ref != dback->num_refs) {
5722                                 err = 1;
5723                                 if (!print_errs)
5724                                         goto out;
5725                                 fprintf(stderr, "Incorrect local backref count"
5726                                         " on %llu %s %llu owner %llu"
5727                                         " offset %llu found %u wanted %u back %p\n",
5728                                         (unsigned long long)rec->start,
5729                                         back->full_backref ?
5730                                         "parent" : "root",
5731                                         back->full_backref ?
5732                                         (unsigned long long)dback->parent:
5733                                         (unsigned long long)dback->root,
5734                                         (unsigned long long)dback->owner,
5735                                         (unsigned long long)dback->offset,
5736                                         dback->found_ref, dback->num_refs, back);
5737                         }
5738                         if (dback->disk_bytenr != rec->start) {
5739                                 err = 1;
5740                                 if (!print_errs)
5741                                         goto out;
5742                                 fprintf(stderr, "Backref disk bytenr does not"
5743                                         " match extent record, bytenr=%llu, "
5744                                         "ref bytenr=%llu\n",
5745                                         (unsigned long long)rec->start,
5746                                         (unsigned long long)dback->disk_bytenr);
5747                         }
5748
5749                         if (dback->bytes != rec->nr) {
5750                                 err = 1;
5751                                 if (!print_errs)
5752                                         goto out;
5753                                 fprintf(stderr, "Backref bytes do not match "
5754                                         "extent backref, bytenr=%llu, ref "
5755                                         "bytes=%llu, backref bytes=%llu\n",
5756                                         (unsigned long long)rec->start,
5757                                         (unsigned long long)rec->nr,
5758                                         (unsigned long long)dback->bytes);
5759                         }
5760                 }
5761                 if (!back->is_data) {
5762                         found += 1;
5763                 } else {
5764                         dback = to_data_backref(back);
5765                         found += dback->found_ref;
5766                 }
5767         }
5768         if (found != rec->refs) {
5769                 err = 1;
5770                 if (!print_errs)
5771                         goto out;
5772                 fprintf(stderr, "Incorrect global backref count "
5773                         "on %llu found %llu wanted %llu\n",
5774                         (unsigned long long)rec->start,
5775                         (unsigned long long)found,
5776                         (unsigned long long)rec->refs);
5777         }
5778 out:
5779         return err;
5780 }
5781
5782 static void __free_one_backref(struct rb_node *node)
5783 {
5784         struct extent_backref *back = rb_node_to_extent_backref(node);
5785
5786         free(back);
5787 }
5788
5789 static void free_all_extent_backrefs(struct extent_record *rec)
5790 {
5791         rb_free_nodes(&rec->backref_tree, __free_one_backref);
5792 }
5793
5794 static void free_extent_record_cache(struct cache_tree *extent_cache)
5795 {
5796         struct cache_extent *cache;
5797         struct extent_record *rec;
5798
5799         while (1) {
5800                 cache = first_cache_extent(extent_cache);
5801                 if (!cache)
5802                         break;
5803                 rec = container_of(cache, struct extent_record, cache);
5804                 remove_cache_extent(extent_cache, cache);
5805                 free_all_extent_backrefs(rec);
5806                 free(rec);
5807         }
5808 }
5809
5810 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5811                                  struct extent_record *rec)
5812 {
5813         if (rec->content_checked && rec->owner_ref_checked &&
5814             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5815             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5816             !rec->bad_full_backref && !rec->crossing_stripes &&
5817             !rec->wrong_chunk_type) {
5818                 remove_cache_extent(extent_cache, &rec->cache);
5819                 free_all_extent_backrefs(rec);
5820                 list_del_init(&rec->list);
5821                 free(rec);
5822         }
5823         return 0;
5824 }
5825
5826 static int check_owner_ref(struct btrfs_root *root,
5827                             struct extent_record *rec,
5828                             struct extent_buffer *buf)
5829 {
5830         struct extent_backref *node, *tmp;
5831         struct tree_backref *back;
5832         struct btrfs_root *ref_root;
5833         struct btrfs_key key;
5834         struct btrfs_path path;
5835         struct extent_buffer *parent;
5836         int level;
5837         int found = 0;
5838         int ret;
5839
5840         rbtree_postorder_for_each_entry_safe(node, tmp,
5841                                              &rec->backref_tree, node) {
5842                 if (node->is_data)
5843                         continue;
5844                 if (!node->found_ref)
5845                         continue;
5846                 if (node->full_backref)
5847                         continue;
5848                 back = to_tree_backref(node);
5849                 if (btrfs_header_owner(buf) == back->root)
5850                         return 0;
5851         }
5852         BUG_ON(rec->is_root);
5853
5854         /* try to find the block by search corresponding fs tree */
5855         key.objectid = btrfs_header_owner(buf);
5856         key.type = BTRFS_ROOT_ITEM_KEY;
5857         key.offset = (u64)-1;
5858
5859         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5860         if (IS_ERR(ref_root))
5861                 return 1;
5862
5863         level = btrfs_header_level(buf);
5864         if (level == 0)
5865                 btrfs_item_key_to_cpu(buf, &key, 0);
5866         else
5867                 btrfs_node_key_to_cpu(buf, &key, 0);
5868
5869         btrfs_init_path(&path);
5870         path.lowest_level = level + 1;
5871         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5872         if (ret < 0)
5873                 return 0;
5874
5875         parent = path.nodes[level + 1];
5876         if (parent && buf->start == btrfs_node_blockptr(parent,
5877                                                         path.slots[level + 1]))
5878                 found = 1;
5879
5880         btrfs_release_path(&path);
5881         return found ? 0 : 1;
5882 }
5883
5884 static int is_extent_tree_record(struct extent_record *rec)
5885 {
5886         struct extent_backref *node, *tmp;
5887         struct tree_backref *back;
5888         int is_extent = 0;
5889
5890         rbtree_postorder_for_each_entry_safe(node, tmp,
5891                                              &rec->backref_tree, node) {
5892                 if (node->is_data)
5893                         return 0;
5894                 back = to_tree_backref(node);
5895                 if (node->full_backref)
5896                         return 0;
5897                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5898                         is_extent = 1;
5899         }
5900         return is_extent;
5901 }
5902
5903
5904 static int record_bad_block_io(struct btrfs_fs_info *info,
5905                                struct cache_tree *extent_cache,
5906                                u64 start, u64 len)
5907 {
5908         struct extent_record *rec;
5909         struct cache_extent *cache;
5910         struct btrfs_key key;
5911
5912         cache = lookup_cache_extent(extent_cache, start, len);
5913         if (!cache)
5914                 return 0;
5915
5916         rec = container_of(cache, struct extent_record, cache);
5917         if (!is_extent_tree_record(rec))
5918                 return 0;
5919
5920         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5921         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5922 }
5923
5924 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5925                        struct extent_buffer *buf, int slot)
5926 {
5927         if (btrfs_header_level(buf)) {
5928                 struct btrfs_key_ptr ptr1, ptr2;
5929
5930                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5931                                    sizeof(struct btrfs_key_ptr));
5932                 read_extent_buffer(buf, &ptr2,
5933                                    btrfs_node_key_ptr_offset(slot + 1),
5934                                    sizeof(struct btrfs_key_ptr));
5935                 write_extent_buffer(buf, &ptr1,
5936                                     btrfs_node_key_ptr_offset(slot + 1),
5937                                     sizeof(struct btrfs_key_ptr));
5938                 write_extent_buffer(buf, &ptr2,
5939                                     btrfs_node_key_ptr_offset(slot),
5940                                     sizeof(struct btrfs_key_ptr));
5941                 if (slot == 0) {
5942                         struct btrfs_disk_key key;
5943                         btrfs_node_key(buf, &key, 0);
5944                         btrfs_fixup_low_keys(root, path, &key,
5945                                              btrfs_header_level(buf) + 1);
5946                 }
5947         } else {
5948                 struct btrfs_item *item1, *item2;
5949                 struct btrfs_key k1, k2;
5950                 char *item1_data, *item2_data;
5951                 u32 item1_offset, item2_offset, item1_size, item2_size;
5952
5953                 item1 = btrfs_item_nr(slot);
5954                 item2 = btrfs_item_nr(slot + 1);
5955                 btrfs_item_key_to_cpu(buf, &k1, slot);
5956                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5957                 item1_offset = btrfs_item_offset(buf, item1);
5958                 item2_offset = btrfs_item_offset(buf, item2);
5959                 item1_size = btrfs_item_size(buf, item1);
5960                 item2_size = btrfs_item_size(buf, item2);
5961
5962                 item1_data = malloc(item1_size);
5963                 if (!item1_data)
5964                         return -ENOMEM;
5965                 item2_data = malloc(item2_size);
5966                 if (!item2_data) {
5967                         free(item1_data);
5968                         return -ENOMEM;
5969                 }
5970
5971                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5972                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5973
5974                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5975                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5976                 free(item1_data);
5977                 free(item2_data);
5978
5979                 btrfs_set_item_offset(buf, item1, item2_offset);
5980                 btrfs_set_item_offset(buf, item2, item1_offset);
5981                 btrfs_set_item_size(buf, item1, item2_size);
5982                 btrfs_set_item_size(buf, item2, item1_size);
5983
5984                 path->slots[0] = slot;
5985                 btrfs_set_item_key_unsafe(root, path, &k2);
5986                 path->slots[0] = slot + 1;
5987                 btrfs_set_item_key_unsafe(root, path, &k1);
5988         }
5989         return 0;
5990 }
5991
5992 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5993 {
5994         struct extent_buffer *buf;
5995         struct btrfs_key k1, k2;
5996         int i;
5997         int level = path->lowest_level;
5998         int ret = -EIO;
5999
6000         buf = path->nodes[level];
6001         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6002                 if (level) {
6003                         btrfs_node_key_to_cpu(buf, &k1, i);
6004                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6005                 } else {
6006                         btrfs_item_key_to_cpu(buf, &k1, i);
6007                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6008                 }
6009                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6010                         continue;
6011                 ret = swap_values(root, path, buf, i);
6012                 if (ret)
6013                         break;
6014                 btrfs_mark_buffer_dirty(buf);
6015                 i = 0;
6016         }
6017         return ret;
6018 }
6019
6020 static int delete_bogus_item(struct btrfs_root *root,
6021                              struct btrfs_path *path,
6022                              struct extent_buffer *buf, int slot)
6023 {
6024         struct btrfs_key key;
6025         int nritems = btrfs_header_nritems(buf);
6026
6027         btrfs_item_key_to_cpu(buf, &key, slot);
6028
6029         /* These are all the keys we can deal with missing. */
6030         if (key.type != BTRFS_DIR_INDEX_KEY &&
6031             key.type != BTRFS_EXTENT_ITEM_KEY &&
6032             key.type != BTRFS_METADATA_ITEM_KEY &&
6033             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6034             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6035                 return -1;
6036
6037         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6038                (unsigned long long)key.objectid, key.type,
6039                (unsigned long long)key.offset, slot, buf->start);
6040         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6041                               btrfs_item_nr_offset(slot + 1),
6042                               sizeof(struct btrfs_item) *
6043                               (nritems - slot - 1));
6044         btrfs_set_header_nritems(buf, nritems - 1);
6045         if (slot == 0) {
6046                 struct btrfs_disk_key disk_key;
6047
6048                 btrfs_item_key(buf, &disk_key, 0);
6049                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6050         }
6051         btrfs_mark_buffer_dirty(buf);
6052         return 0;
6053 }
6054
6055 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6056 {
6057         struct extent_buffer *buf;
6058         int i;
6059         int ret = 0;
6060
6061         /* We should only get this for leaves */
6062         BUG_ON(path->lowest_level);
6063         buf = path->nodes[0];
6064 again:
6065         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6066                 unsigned int shift = 0, offset;
6067
6068                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6069                     BTRFS_LEAF_DATA_SIZE(root)) {
6070                         if (btrfs_item_end_nr(buf, i) >
6071                             BTRFS_LEAF_DATA_SIZE(root)) {
6072                                 ret = delete_bogus_item(root, path, buf, i);
6073                                 if (!ret)
6074                                         goto again;
6075                                 fprintf(stderr, "item is off the end of the "
6076                                         "leaf, can't fix\n");
6077                                 ret = -EIO;
6078                                 break;
6079                         }
6080                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6081                                 btrfs_item_end_nr(buf, i);
6082                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6083                            btrfs_item_offset_nr(buf, i - 1)) {
6084                         if (btrfs_item_end_nr(buf, i) >
6085                             btrfs_item_offset_nr(buf, i - 1)) {
6086                                 ret = delete_bogus_item(root, path, buf, i);
6087                                 if (!ret)
6088                                         goto again;
6089                                 fprintf(stderr, "items overlap, can't fix\n");
6090                                 ret = -EIO;
6091                                 break;
6092                         }
6093                         shift = btrfs_item_offset_nr(buf, i - 1) -
6094                                 btrfs_item_end_nr(buf, i);
6095                 }
6096                 if (!shift)
6097                         continue;
6098
6099                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6100                        i, shift, (unsigned long long)buf->start);
6101                 offset = btrfs_item_offset_nr(buf, i);
6102                 memmove_extent_buffer(buf,
6103                                       btrfs_leaf_data(buf) + offset + shift,
6104                                       btrfs_leaf_data(buf) + offset,
6105                                       btrfs_item_size_nr(buf, i));
6106                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6107                                       offset + shift);
6108                 btrfs_mark_buffer_dirty(buf);
6109         }
6110
6111         /*
6112          * We may have moved things, in which case we want to exit so we don't
6113          * write those changes out.  Once we have proper abort functionality in
6114          * progs this can be changed to something nicer.
6115          */
6116         BUG_ON(ret);
6117         return ret;
6118 }
6119
6120 /*
6121  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6122  * then just return -EIO.
6123  */
6124 static int try_to_fix_bad_block(struct btrfs_root *root,
6125                                 struct extent_buffer *buf,
6126                                 enum btrfs_tree_block_status status)
6127 {
6128         struct btrfs_trans_handle *trans;
6129         struct ulist *roots;
6130         struct ulist_node *node;
6131         struct btrfs_root *search_root;
6132         struct btrfs_path path;
6133         struct ulist_iterator iter;
6134         struct btrfs_key root_key, key;
6135         int ret;
6136
6137         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6138             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6139                 return -EIO;
6140
6141         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6142         if (ret)
6143                 return -EIO;
6144
6145         btrfs_init_path(&path);
6146         ULIST_ITER_INIT(&iter);
6147         while ((node = ulist_next(roots, &iter))) {
6148                 root_key.objectid = node->val;
6149                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6150                 root_key.offset = (u64)-1;
6151
6152                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6153                 if (IS_ERR(root)) {
6154                         ret = -EIO;
6155                         break;
6156                 }
6157
6158
6159                 trans = btrfs_start_transaction(search_root, 0);
6160                 if (IS_ERR(trans)) {
6161                         ret = PTR_ERR(trans);
6162                         break;
6163                 }
6164
6165                 path.lowest_level = btrfs_header_level(buf);
6166                 path.skip_check_block = 1;
6167                 if (path.lowest_level)
6168                         btrfs_node_key_to_cpu(buf, &key, 0);
6169                 else
6170                         btrfs_item_key_to_cpu(buf, &key, 0);
6171                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6172                 if (ret) {
6173                         ret = -EIO;
6174                         btrfs_commit_transaction(trans, search_root);
6175                         break;
6176                 }
6177                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6178                         ret = fix_key_order(search_root, &path);
6179                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6180                         ret = fix_item_offset(search_root, &path);
6181                 if (ret) {
6182                         btrfs_commit_transaction(trans, search_root);
6183                         break;
6184                 }
6185                 btrfs_release_path(&path);
6186                 btrfs_commit_transaction(trans, search_root);
6187         }
6188         ulist_free(roots);
6189         btrfs_release_path(&path);
6190         return ret;
6191 }
6192
6193 static int check_block(struct btrfs_root *root,
6194                        struct cache_tree *extent_cache,
6195                        struct extent_buffer *buf, u64 flags)
6196 {
6197         struct extent_record *rec;
6198         struct cache_extent *cache;
6199         struct btrfs_key key;
6200         enum btrfs_tree_block_status status;
6201         int ret = 0;
6202         int level;
6203
6204         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6205         if (!cache)
6206                 return 1;
6207         rec = container_of(cache, struct extent_record, cache);
6208         rec->generation = btrfs_header_generation(buf);
6209
6210         level = btrfs_header_level(buf);
6211         if (btrfs_header_nritems(buf) > 0) {
6212
6213                 if (level == 0)
6214                         btrfs_item_key_to_cpu(buf, &key, 0);
6215                 else
6216                         btrfs_node_key_to_cpu(buf, &key, 0);
6217
6218                 rec->info_objectid = key.objectid;
6219         }
6220         rec->info_level = level;
6221
6222         if (btrfs_is_leaf(buf))
6223                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6224         else
6225                 status = btrfs_check_node(root, &rec->parent_key, buf);
6226
6227         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6228                 if (repair)
6229                         status = try_to_fix_bad_block(root, buf, status);
6230                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6231                         ret = -EIO;
6232                         fprintf(stderr, "bad block %llu\n",
6233                                 (unsigned long long)buf->start);
6234                 } else {
6235                         /*
6236                          * Signal to callers we need to start the scan over
6237                          * again since we'll have cowed blocks.
6238                          */
6239                         ret = -EAGAIN;
6240                 }
6241         } else {
6242                 rec->content_checked = 1;
6243                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6244                         rec->owner_ref_checked = 1;
6245                 else {
6246                         ret = check_owner_ref(root, rec, buf);
6247                         if (!ret)
6248                                 rec->owner_ref_checked = 1;
6249                 }
6250         }
6251         if (!ret)
6252                 maybe_free_extent_rec(extent_cache, rec);
6253         return ret;
6254 }
6255
6256 #if 0
6257 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6258                                                 u64 parent, u64 root)
6259 {
6260         struct list_head *cur = rec->backrefs.next;
6261         struct extent_backref *node;
6262         struct tree_backref *back;
6263
6264         while(cur != &rec->backrefs) {
6265                 node = to_extent_backref(cur);
6266                 cur = cur->next;
6267                 if (node->is_data)
6268                         continue;
6269                 back = to_tree_backref(node);
6270                 if (parent > 0) {
6271                         if (!node->full_backref)
6272                                 continue;
6273                         if (parent == back->parent)
6274                                 return back;
6275                 } else {
6276                         if (node->full_backref)
6277                                 continue;
6278                         if (back->root == root)
6279                                 return back;
6280                 }
6281         }
6282         return NULL;
6283 }
6284 #endif
6285
6286 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6287                                                 u64 parent, u64 root)
6288 {
6289         struct tree_backref *ref = malloc(sizeof(*ref));
6290
6291         if (!ref)
6292                 return NULL;
6293         memset(&ref->node, 0, sizeof(ref->node));
6294         if (parent > 0) {
6295                 ref->parent = parent;
6296                 ref->node.full_backref = 1;
6297         } else {
6298                 ref->root = root;
6299                 ref->node.full_backref = 0;
6300         }
6301
6302         return ref;
6303 }
6304
6305 #if 0
6306 static struct data_backref *find_data_backref(struct extent_record *rec,
6307                                                 u64 parent, u64 root,
6308                                                 u64 owner, u64 offset,
6309                                                 int found_ref,
6310                                                 u64 disk_bytenr, u64 bytes)
6311 {
6312         struct list_head *cur = rec->backrefs.next;
6313         struct extent_backref *node;
6314         struct data_backref *back;
6315
6316         while(cur != &rec->backrefs) {
6317                 node = to_extent_backref(cur);
6318                 cur = cur->next;
6319                 if (!node->is_data)
6320                         continue;
6321                 back = to_data_backref(node);
6322                 if (parent > 0) {
6323                         if (!node->full_backref)
6324                                 continue;
6325                         if (parent == back->parent)
6326                                 return back;
6327                 } else {
6328                         if (node->full_backref)
6329                                 continue;
6330                         if (back->root == root && back->owner == owner &&
6331                             back->offset == offset) {
6332                                 if (found_ref && node->found_ref &&
6333                                     (back->bytes != bytes ||
6334                                     back->disk_bytenr != disk_bytenr))
6335                                         continue;
6336                                 return back;
6337                         }
6338                 }
6339         }
6340         return NULL;
6341 }
6342 #endif
6343
6344 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6345                                                 u64 parent, u64 root,
6346                                                 u64 owner, u64 offset,
6347                                                 u64 max_size)
6348 {
6349         struct data_backref *ref = malloc(sizeof(*ref));
6350
6351         if (!ref)
6352                 return NULL;
6353         memset(&ref->node, 0, sizeof(ref->node));
6354         ref->node.is_data = 1;
6355
6356         if (parent > 0) {
6357                 ref->parent = parent;
6358                 ref->owner = 0;
6359                 ref->offset = 0;
6360                 ref->node.full_backref = 1;
6361         } else {
6362                 ref->root = root;
6363                 ref->owner = owner;
6364                 ref->offset = offset;
6365                 ref->node.full_backref = 0;
6366         }
6367         ref->bytes = max_size;
6368         ref->found_ref = 0;
6369         ref->num_refs = 0;
6370         if (max_size > rec->max_size)
6371                 rec->max_size = max_size;
6372         return ref;
6373 }
6374
6375 /* Check if the type of extent matches with its chunk */
6376 static void check_extent_type(struct extent_record *rec)
6377 {
6378         struct btrfs_block_group_cache *bg_cache;
6379
6380         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6381         if (!bg_cache)
6382                 return;
6383
6384         /* data extent, check chunk directly*/
6385         if (!rec->metadata) {
6386                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6387                         rec->wrong_chunk_type = 1;
6388                 return;
6389         }
6390
6391         /* metadata extent, check the obvious case first */
6392         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6393                                  BTRFS_BLOCK_GROUP_METADATA))) {
6394                 rec->wrong_chunk_type = 1;
6395                 return;
6396         }
6397
6398         /*
6399          * Check SYSTEM extent, as it's also marked as metadata, we can only
6400          * make sure it's a SYSTEM extent by its backref
6401          */
6402         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6403                 struct extent_backref *node;
6404                 struct tree_backref *tback;
6405                 u64 bg_type;
6406
6407                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6408                 if (node->is_data) {
6409                         /* tree block shouldn't have data backref */
6410                         rec->wrong_chunk_type = 1;
6411                         return;
6412                 }
6413                 tback = container_of(node, struct tree_backref, node);
6414
6415                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6416                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6417                 else
6418                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6419                 if (!(bg_cache->flags & bg_type))
6420                         rec->wrong_chunk_type = 1;
6421         }
6422 }
6423
6424 /*
6425  * Allocate a new extent record, fill default values from @tmpl and insert int
6426  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6427  * the cache, otherwise it fails.
6428  */
6429 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6430                 struct extent_record *tmpl)
6431 {
6432         struct extent_record *rec;
6433         int ret = 0;
6434
6435         BUG_ON(tmpl->max_size == 0);
6436         rec = malloc(sizeof(*rec));
6437         if (!rec)
6438                 return -ENOMEM;
6439         rec->start = tmpl->start;
6440         rec->max_size = tmpl->max_size;
6441         rec->nr = max(tmpl->nr, tmpl->max_size);
6442         rec->found_rec = tmpl->found_rec;
6443         rec->content_checked = tmpl->content_checked;
6444         rec->owner_ref_checked = tmpl->owner_ref_checked;
6445         rec->num_duplicates = 0;
6446         rec->metadata = tmpl->metadata;
6447         rec->flag_block_full_backref = FLAG_UNSET;
6448         rec->bad_full_backref = 0;
6449         rec->crossing_stripes = 0;
6450         rec->wrong_chunk_type = 0;
6451         rec->is_root = tmpl->is_root;
6452         rec->refs = tmpl->refs;
6453         rec->extent_item_refs = tmpl->extent_item_refs;
6454         rec->parent_generation = tmpl->parent_generation;
6455         INIT_LIST_HEAD(&rec->backrefs);
6456         INIT_LIST_HEAD(&rec->dups);
6457         INIT_LIST_HEAD(&rec->list);
6458         rec->backref_tree = RB_ROOT;
6459         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6460         rec->cache.start = tmpl->start;
6461         rec->cache.size = tmpl->nr;
6462         ret = insert_cache_extent(extent_cache, &rec->cache);
6463         if (ret) {
6464                 free(rec);
6465                 return ret;
6466         }
6467         bytes_used += rec->nr;
6468
6469         if (tmpl->metadata)
6470                 rec->crossing_stripes = check_crossing_stripes(global_info,
6471                                 rec->start, global_info->nodesize);
6472         check_extent_type(rec);
6473         return ret;
6474 }
6475
6476 /*
6477  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6478  * some are hints:
6479  * - refs              - if found, increase refs
6480  * - is_root           - if found, set
6481  * - content_checked   - if found, set
6482  * - owner_ref_checked - if found, set
6483  *
6484  * If not found, create a new one, initialize and insert.
6485  */
6486 static int add_extent_rec(struct cache_tree *extent_cache,
6487                 struct extent_record *tmpl)
6488 {
6489         struct extent_record *rec;
6490         struct cache_extent *cache;
6491         int ret = 0;
6492         int dup = 0;
6493
6494         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6495         if (cache) {
6496                 rec = container_of(cache, struct extent_record, cache);
6497                 if (tmpl->refs)
6498                         rec->refs++;
6499                 if (rec->nr == 1)
6500                         rec->nr = max(tmpl->nr, tmpl->max_size);
6501
6502                 /*
6503                  * We need to make sure to reset nr to whatever the extent
6504                  * record says was the real size, this way we can compare it to
6505                  * the backrefs.
6506                  */
6507                 if (tmpl->found_rec) {
6508                         if (tmpl->start != rec->start || rec->found_rec) {
6509                                 struct extent_record *tmp;
6510
6511                                 dup = 1;
6512                                 if (list_empty(&rec->list))
6513                                         list_add_tail(&rec->list,
6514                                                       &duplicate_extents);
6515
6516                                 /*
6517                                  * We have to do this song and dance in case we
6518                                  * find an extent record that falls inside of
6519                                  * our current extent record but does not have
6520                                  * the same objectid.
6521                                  */
6522                                 tmp = malloc(sizeof(*tmp));
6523                                 if (!tmp)
6524                                         return -ENOMEM;
6525                                 tmp->start = tmpl->start;
6526                                 tmp->max_size = tmpl->max_size;
6527                                 tmp->nr = tmpl->nr;
6528                                 tmp->found_rec = 1;
6529                                 tmp->metadata = tmpl->metadata;
6530                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6531                                 INIT_LIST_HEAD(&tmp->list);
6532                                 list_add_tail(&tmp->list, &rec->dups);
6533                                 rec->num_duplicates++;
6534                         } else {
6535                                 rec->nr = tmpl->nr;
6536                                 rec->found_rec = 1;
6537                         }
6538                 }
6539
6540                 if (tmpl->extent_item_refs && !dup) {
6541                         if (rec->extent_item_refs) {
6542                                 fprintf(stderr, "block %llu rec "
6543                                         "extent_item_refs %llu, passed %llu\n",
6544                                         (unsigned long long)tmpl->start,
6545                                         (unsigned long long)
6546                                                         rec->extent_item_refs,
6547                                         (unsigned long long)tmpl->extent_item_refs);
6548                         }
6549                         rec->extent_item_refs = tmpl->extent_item_refs;
6550                 }
6551                 if (tmpl->is_root)
6552                         rec->is_root = 1;
6553                 if (tmpl->content_checked)
6554                         rec->content_checked = 1;
6555                 if (tmpl->owner_ref_checked)
6556                         rec->owner_ref_checked = 1;
6557                 memcpy(&rec->parent_key, &tmpl->parent_key,
6558                                 sizeof(tmpl->parent_key));
6559                 if (tmpl->parent_generation)
6560                         rec->parent_generation = tmpl->parent_generation;
6561                 if (rec->max_size < tmpl->max_size)
6562                         rec->max_size = tmpl->max_size;
6563
6564                 /*
6565                  * A metadata extent can't cross stripe_len boundary, otherwise
6566                  * kernel scrub won't be able to handle it.
6567                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6568                  * it.
6569                  */
6570                 if (tmpl->metadata)
6571                         rec->crossing_stripes = check_crossing_stripes(
6572                                         global_info, rec->start,
6573                                         global_info->nodesize);
6574                 check_extent_type(rec);
6575                 maybe_free_extent_rec(extent_cache, rec);
6576                 return ret;
6577         }
6578
6579         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6580
6581         return ret;
6582 }
6583
6584 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6585                             u64 parent, u64 root, int found_ref)
6586 {
6587         struct extent_record *rec;
6588         struct tree_backref *back;
6589         struct cache_extent *cache;
6590         int ret;
6591         bool insert = false;
6592
6593         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6594         if (!cache) {
6595                 struct extent_record tmpl;
6596
6597                 memset(&tmpl, 0, sizeof(tmpl));
6598                 tmpl.start = bytenr;
6599                 tmpl.nr = 1;
6600                 tmpl.metadata = 1;
6601                 tmpl.max_size = 1;
6602
6603                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6604                 if (ret)
6605                         return ret;
6606
6607                 /* really a bug in cache_extent implement now */
6608                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6609                 if (!cache)
6610                         return -ENOENT;
6611         }
6612
6613         rec = container_of(cache, struct extent_record, cache);
6614         if (rec->start != bytenr) {
6615                 /*
6616                  * Several cause, from unaligned bytenr to over lapping extents
6617                  */
6618                 return -EEXIST;
6619         }
6620
6621         back = find_tree_backref(rec, parent, root);
6622         if (!back) {
6623                 back = alloc_tree_backref(rec, parent, root);
6624                 if (!back)
6625                         return -ENOMEM;
6626                 insert = true;
6627         }
6628
6629         if (found_ref) {
6630                 if (back->node.found_ref) {
6631                         fprintf(stderr, "Extent back ref already exists "
6632                                 "for %llu parent %llu root %llu \n",
6633                                 (unsigned long long)bytenr,
6634                                 (unsigned long long)parent,
6635                                 (unsigned long long)root);
6636                 }
6637                 back->node.found_ref = 1;
6638         } else {
6639                 if (back->node.found_extent_tree) {
6640                         fprintf(stderr, "Extent back ref already exists "
6641                                 "for %llu parent %llu root %llu \n",
6642                                 (unsigned long long)bytenr,
6643                                 (unsigned long long)parent,
6644                                 (unsigned long long)root);
6645                 }
6646                 back->node.found_extent_tree = 1;
6647         }
6648         if (insert)
6649                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6650                         compare_extent_backref));
6651         check_extent_type(rec);
6652         maybe_free_extent_rec(extent_cache, rec);
6653         return 0;
6654 }
6655
6656 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6657                             u64 parent, u64 root, u64 owner, u64 offset,
6658                             u32 num_refs, int found_ref, u64 max_size)
6659 {
6660         struct extent_record *rec;
6661         struct data_backref *back;
6662         struct cache_extent *cache;
6663         int ret;
6664         bool insert = false;
6665
6666         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6667         if (!cache) {
6668                 struct extent_record tmpl;
6669
6670                 memset(&tmpl, 0, sizeof(tmpl));
6671                 tmpl.start = bytenr;
6672                 tmpl.nr = 1;
6673                 tmpl.max_size = max_size;
6674
6675                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6676                 if (ret)
6677                         return ret;
6678
6679                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6680                 if (!cache)
6681                         abort();
6682         }
6683
6684         rec = container_of(cache, struct extent_record, cache);
6685         if (rec->max_size < max_size)
6686                 rec->max_size = max_size;
6687
6688         /*
6689          * If found_ref is set then max_size is the real size and must match the
6690          * existing refs.  So if we have already found a ref then we need to
6691          * make sure that this ref matches the existing one, otherwise we need
6692          * to add a new backref so we can notice that the backrefs don't match
6693          * and we need to figure out who is telling the truth.  This is to
6694          * account for that awful fsync bug I introduced where we'd end up with
6695          * a btrfs_file_extent_item that would have its length include multiple
6696          * prealloc extents or point inside of a prealloc extent.
6697          */
6698         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6699                                  bytenr, max_size);
6700         if (!back) {
6701                 back = alloc_data_backref(rec, parent, root, owner, offset,
6702                                           max_size);
6703                 BUG_ON(!back);
6704                 insert = true;
6705         }
6706
6707         if (found_ref) {
6708                 BUG_ON(num_refs != 1);
6709                 if (back->node.found_ref)
6710                         BUG_ON(back->bytes != max_size);
6711                 back->node.found_ref = 1;
6712                 back->found_ref += 1;
6713                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6714                         back->bytes = max_size;
6715                         back->disk_bytenr = bytenr;
6716
6717                         /* Need to reinsert if not already in the tree */
6718                         if (!insert) {
6719                                 rb_erase(&back->node.node, &rec->backref_tree);
6720                                 insert = true;
6721                         }
6722                 }
6723                 rec->refs += 1;
6724                 rec->content_checked = 1;
6725                 rec->owner_ref_checked = 1;
6726         } else {
6727                 if (back->node.found_extent_tree) {
6728                         fprintf(stderr, "Extent back ref already exists "
6729                                 "for %llu parent %llu root %llu "
6730                                 "owner %llu offset %llu num_refs %lu\n",
6731                                 (unsigned long long)bytenr,
6732                                 (unsigned long long)parent,
6733                                 (unsigned long long)root,
6734                                 (unsigned long long)owner,
6735                                 (unsigned long long)offset,
6736                                 (unsigned long)num_refs);
6737                 }
6738                 back->num_refs = num_refs;
6739                 back->node.found_extent_tree = 1;
6740         }
6741         if (insert)
6742                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6743                         compare_extent_backref));
6744
6745         maybe_free_extent_rec(extent_cache, rec);
6746         return 0;
6747 }
6748
6749 static int add_pending(struct cache_tree *pending,
6750                        struct cache_tree *seen, u64 bytenr, u32 size)
6751 {
6752         int ret;
6753         ret = add_cache_extent(seen, bytenr, size);
6754         if (ret)
6755                 return ret;
6756         add_cache_extent(pending, bytenr, size);
6757         return 0;
6758 }
6759
6760 static int pick_next_pending(struct cache_tree *pending,
6761                         struct cache_tree *reada,
6762                         struct cache_tree *nodes,
6763                         u64 last, struct block_info *bits, int bits_nr,
6764                         int *reada_bits)
6765 {
6766         unsigned long node_start = last;
6767         struct cache_extent *cache;
6768         int ret;
6769
6770         cache = search_cache_extent(reada, 0);
6771         if (cache) {
6772                 bits[0].start = cache->start;
6773                 bits[0].size = cache->size;
6774                 *reada_bits = 1;
6775                 return 1;
6776         }
6777         *reada_bits = 0;
6778         if (node_start > 32768)
6779                 node_start -= 32768;
6780
6781         cache = search_cache_extent(nodes, node_start);
6782         if (!cache)
6783                 cache = search_cache_extent(nodes, 0);
6784
6785         if (!cache) {
6786                  cache = search_cache_extent(pending, 0);
6787                  if (!cache)
6788                          return 0;
6789                  ret = 0;
6790                  do {
6791                          bits[ret].start = cache->start;
6792                          bits[ret].size = cache->size;
6793                          cache = next_cache_extent(cache);
6794                          ret++;
6795                  } while (cache && ret < bits_nr);
6796                  return ret;
6797         }
6798
6799         ret = 0;
6800         do {
6801                 bits[ret].start = cache->start;
6802                 bits[ret].size = cache->size;
6803                 cache = next_cache_extent(cache);
6804                 ret++;
6805         } while (cache && ret < bits_nr);
6806
6807         if (bits_nr - ret > 8) {
6808                 u64 lookup = bits[0].start + bits[0].size;
6809                 struct cache_extent *next;
6810                 next = search_cache_extent(pending, lookup);
6811                 while(next) {
6812                         if (next->start - lookup > 32768)
6813                                 break;
6814                         bits[ret].start = next->start;
6815                         bits[ret].size = next->size;
6816                         lookup = next->start + next->size;
6817                         ret++;
6818                         if (ret == bits_nr)
6819                                 break;
6820                         next = next_cache_extent(next);
6821                         if (!next)
6822                                 break;
6823                 }
6824         }
6825         return ret;
6826 }
6827
6828 static void free_chunk_record(struct cache_extent *cache)
6829 {
6830         struct chunk_record *rec;
6831
6832         rec = container_of(cache, struct chunk_record, cache);
6833         list_del_init(&rec->list);
6834         list_del_init(&rec->dextents);
6835         free(rec);
6836 }
6837
6838 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6839 {
6840         cache_tree_free_extents(chunk_cache, free_chunk_record);
6841 }
6842
6843 static void free_device_record(struct rb_node *node)
6844 {
6845         struct device_record *rec;
6846
6847         rec = container_of(node, struct device_record, node);
6848         free(rec);
6849 }
6850
6851 FREE_RB_BASED_TREE(device_cache, free_device_record);
6852
6853 int insert_block_group_record(struct block_group_tree *tree,
6854                               struct block_group_record *bg_rec)
6855 {
6856         int ret;
6857
6858         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6859         if (ret)
6860                 return ret;
6861
6862         list_add_tail(&bg_rec->list, &tree->block_groups);
6863         return 0;
6864 }
6865
6866 static void free_block_group_record(struct cache_extent *cache)
6867 {
6868         struct block_group_record *rec;
6869
6870         rec = container_of(cache, struct block_group_record, cache);
6871         list_del_init(&rec->list);
6872         free(rec);
6873 }
6874
6875 void free_block_group_tree(struct block_group_tree *tree)
6876 {
6877         cache_tree_free_extents(&tree->tree, free_block_group_record);
6878 }
6879
6880 int insert_device_extent_record(struct device_extent_tree *tree,
6881                                 struct device_extent_record *de_rec)
6882 {
6883         int ret;
6884
6885         /*
6886          * Device extent is a bit different from the other extents, because
6887          * the extents which belong to the different devices may have the
6888          * same start and size, so we need use the special extent cache
6889          * search/insert functions.
6890          */
6891         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6892         if (ret)
6893                 return ret;
6894
6895         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6896         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6897         return 0;
6898 }
6899
6900 static void free_device_extent_record(struct cache_extent *cache)
6901 {
6902         struct device_extent_record *rec;
6903
6904         rec = container_of(cache, struct device_extent_record, cache);
6905         if (!list_empty(&rec->chunk_list))
6906                 list_del_init(&rec->chunk_list);
6907         if (!list_empty(&rec->device_list))
6908                 list_del_init(&rec->device_list);
6909         free(rec);
6910 }
6911
6912 void free_device_extent_tree(struct device_extent_tree *tree)
6913 {
6914         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6915 }
6916
6917 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6918 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6919                                  struct extent_buffer *leaf, int slot)
6920 {
6921         struct btrfs_extent_ref_v0 *ref0;
6922         struct btrfs_key key;
6923         int ret;
6924
6925         btrfs_item_key_to_cpu(leaf, &key, slot);
6926         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6927         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6928                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6929                                 0, 0);
6930         } else {
6931                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6932                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6933         }
6934         return ret;
6935 }
6936 #endif
6937
6938 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6939                                             struct btrfs_key *key,
6940                                             int slot)
6941 {
6942         struct btrfs_chunk *ptr;
6943         struct chunk_record *rec;
6944         int num_stripes, i;
6945
6946         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6947         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6948
6949         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6950         if (!rec) {
6951                 fprintf(stderr, "memory allocation failed\n");
6952                 exit(-1);
6953         }
6954
6955         INIT_LIST_HEAD(&rec->list);
6956         INIT_LIST_HEAD(&rec->dextents);
6957         rec->bg_rec = NULL;
6958
6959         rec->cache.start = key->offset;
6960         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6961
6962         rec->generation = btrfs_header_generation(leaf);
6963
6964         rec->objectid = key->objectid;
6965         rec->type = key->type;
6966         rec->offset = key->offset;
6967
6968         rec->length = rec->cache.size;
6969         rec->owner = btrfs_chunk_owner(leaf, ptr);
6970         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6971         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6972         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6973         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6974         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6975         rec->num_stripes = num_stripes;
6976         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6977
6978         for (i = 0; i < rec->num_stripes; ++i) {
6979                 rec->stripes[i].devid =
6980                         btrfs_stripe_devid_nr(leaf, ptr, i);
6981                 rec->stripes[i].offset =
6982                         btrfs_stripe_offset_nr(leaf, ptr, i);
6983                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6984                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6985                                 BTRFS_UUID_SIZE);
6986         }
6987
6988         return rec;
6989 }
6990
6991 static int process_chunk_item(struct cache_tree *chunk_cache,
6992                               struct btrfs_key *key, struct extent_buffer *eb,
6993                               int slot)
6994 {
6995         struct chunk_record *rec;
6996         struct btrfs_chunk *chunk;
6997         int ret = 0;
6998
6999         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7000         /*
7001          * Do extra check for this chunk item,
7002          *
7003          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7004          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7005          * and owner<->key_type check.
7006          */
7007         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7008                                       key->offset);
7009         if (ret < 0) {
7010                 error("chunk(%llu, %llu) is not valid, ignore it",
7011                       key->offset, btrfs_chunk_length(eb, chunk));
7012                 return 0;
7013         }
7014         rec = btrfs_new_chunk_record(eb, key, slot);
7015         ret = insert_cache_extent(chunk_cache, &rec->cache);
7016         if (ret) {
7017                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7018                         rec->offset, rec->length);
7019                 free(rec);
7020         }
7021
7022         return ret;
7023 }
7024
7025 static int process_device_item(struct rb_root *dev_cache,
7026                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7027 {
7028         struct btrfs_dev_item *ptr;
7029         struct device_record *rec;
7030         int ret = 0;
7031
7032         ptr = btrfs_item_ptr(eb,
7033                 slot, struct btrfs_dev_item);
7034
7035         rec = malloc(sizeof(*rec));
7036         if (!rec) {
7037                 fprintf(stderr, "memory allocation failed\n");
7038                 return -ENOMEM;
7039         }
7040
7041         rec->devid = key->offset;
7042         rec->generation = btrfs_header_generation(eb);
7043
7044         rec->objectid = key->objectid;
7045         rec->type = key->type;
7046         rec->offset = key->offset;
7047
7048         rec->devid = btrfs_device_id(eb, ptr);
7049         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7050         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7051
7052         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7053         if (ret) {
7054                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7055                 free(rec);
7056         }
7057
7058         return ret;
7059 }
7060
7061 struct block_group_record *
7062 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7063                              int slot)
7064 {
7065         struct btrfs_block_group_item *ptr;
7066         struct block_group_record *rec;
7067
7068         rec = calloc(1, sizeof(*rec));
7069         if (!rec) {
7070                 fprintf(stderr, "memory allocation failed\n");
7071                 exit(-1);
7072         }
7073
7074         rec->cache.start = key->objectid;
7075         rec->cache.size = key->offset;
7076
7077         rec->generation = btrfs_header_generation(leaf);
7078
7079         rec->objectid = key->objectid;
7080         rec->type = key->type;
7081         rec->offset = key->offset;
7082
7083         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7084         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7085
7086         INIT_LIST_HEAD(&rec->list);
7087
7088         return rec;
7089 }
7090
7091 static int process_block_group_item(struct block_group_tree *block_group_cache,
7092                                     struct btrfs_key *key,
7093                                     struct extent_buffer *eb, int slot)
7094 {
7095         struct block_group_record *rec;
7096         int ret = 0;
7097
7098         rec = btrfs_new_block_group_record(eb, key, slot);
7099         ret = insert_block_group_record(block_group_cache, rec);
7100         if (ret) {
7101                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7102                         rec->objectid, rec->offset);
7103                 free(rec);
7104         }
7105
7106         return ret;
7107 }
7108
7109 struct device_extent_record *
7110 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7111                                struct btrfs_key *key, int slot)
7112 {
7113         struct device_extent_record *rec;
7114         struct btrfs_dev_extent *ptr;
7115
7116         rec = calloc(1, sizeof(*rec));
7117         if (!rec) {
7118                 fprintf(stderr, "memory allocation failed\n");
7119                 exit(-1);
7120         }
7121
7122         rec->cache.objectid = key->objectid;
7123         rec->cache.start = key->offset;
7124
7125         rec->generation = btrfs_header_generation(leaf);
7126
7127         rec->objectid = key->objectid;
7128         rec->type = key->type;
7129         rec->offset = key->offset;
7130
7131         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7132         rec->chunk_objecteid =
7133                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7134         rec->chunk_offset =
7135                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7136         rec->length = btrfs_dev_extent_length(leaf, ptr);
7137         rec->cache.size = rec->length;
7138
7139         INIT_LIST_HEAD(&rec->chunk_list);
7140         INIT_LIST_HEAD(&rec->device_list);
7141
7142         return rec;
7143 }
7144
7145 static int
7146 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7147                            struct btrfs_key *key, struct extent_buffer *eb,
7148                            int slot)
7149 {
7150         struct device_extent_record *rec;
7151         int ret;
7152
7153         rec = btrfs_new_device_extent_record(eb, key, slot);
7154         ret = insert_device_extent_record(dev_extent_cache, rec);
7155         if (ret) {
7156                 fprintf(stderr,
7157                         "Device extent[%llu, %llu, %llu] existed.\n",
7158                         rec->objectid, rec->offset, rec->length);
7159                 free(rec);
7160         }
7161
7162         return ret;
7163 }
7164
7165 static int process_extent_item(struct btrfs_root *root,
7166                                struct cache_tree *extent_cache,
7167                                struct extent_buffer *eb, int slot)
7168 {
7169         struct btrfs_extent_item *ei;
7170         struct btrfs_extent_inline_ref *iref;
7171         struct btrfs_extent_data_ref *dref;
7172         struct btrfs_shared_data_ref *sref;
7173         struct btrfs_key key;
7174         struct extent_record tmpl;
7175         unsigned long end;
7176         unsigned long ptr;
7177         int ret;
7178         int type;
7179         u32 item_size = btrfs_item_size_nr(eb, slot);
7180         u64 refs = 0;
7181         u64 offset;
7182         u64 num_bytes;
7183         int metadata = 0;
7184
7185         btrfs_item_key_to_cpu(eb, &key, slot);
7186
7187         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7188                 metadata = 1;
7189                 num_bytes = root->fs_info->nodesize;
7190         } else {
7191                 num_bytes = key.offset;
7192         }
7193
7194         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7195                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7196                       key.objectid, root->fs_info->sectorsize);
7197                 return -EIO;
7198         }
7199         if (item_size < sizeof(*ei)) {
7200 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7201                 struct btrfs_extent_item_v0 *ei0;
7202                 BUG_ON(item_size != sizeof(*ei0));
7203                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7204                 refs = btrfs_extent_refs_v0(eb, ei0);
7205 #else
7206                 BUG();
7207 #endif
7208                 memset(&tmpl, 0, sizeof(tmpl));
7209                 tmpl.start = key.objectid;
7210                 tmpl.nr = num_bytes;
7211                 tmpl.extent_item_refs = refs;
7212                 tmpl.metadata = metadata;
7213                 tmpl.found_rec = 1;
7214                 tmpl.max_size = num_bytes;
7215
7216                 return add_extent_rec(extent_cache, &tmpl);
7217         }
7218
7219         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7220         refs = btrfs_extent_refs(eb, ei);
7221         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7222                 metadata = 1;
7223         else
7224                 metadata = 0;
7225         if (metadata && num_bytes != root->fs_info->nodesize) {
7226                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7227                       num_bytes, root->fs_info->nodesize);
7228                 return -EIO;
7229         }
7230         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7231                 error("ignore invalid data extent, length %llu is not aligned to %u",
7232                       num_bytes, root->fs_info->sectorsize);
7233                 return -EIO;
7234         }
7235
7236         memset(&tmpl, 0, sizeof(tmpl));
7237         tmpl.start = key.objectid;
7238         tmpl.nr = num_bytes;
7239         tmpl.extent_item_refs = refs;
7240         tmpl.metadata = metadata;
7241         tmpl.found_rec = 1;
7242         tmpl.max_size = num_bytes;
7243         add_extent_rec(extent_cache, &tmpl);
7244
7245         ptr = (unsigned long)(ei + 1);
7246         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7247             key.type == BTRFS_EXTENT_ITEM_KEY)
7248                 ptr += sizeof(struct btrfs_tree_block_info);
7249
7250         end = (unsigned long)ei + item_size;
7251         while (ptr < end) {
7252                 iref = (struct btrfs_extent_inline_ref *)ptr;
7253                 type = btrfs_extent_inline_ref_type(eb, iref);
7254                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7255                 switch (type) {
7256                 case BTRFS_TREE_BLOCK_REF_KEY:
7257                         ret = add_tree_backref(extent_cache, key.objectid,
7258                                         0, offset, 0);
7259                         if (ret < 0)
7260                                 error(
7261                         "add_tree_backref failed (extent items tree block): %s",
7262                                       strerror(-ret));
7263                         break;
7264                 case BTRFS_SHARED_BLOCK_REF_KEY:
7265                         ret = add_tree_backref(extent_cache, key.objectid,
7266                                         offset, 0, 0);
7267                         if (ret < 0)
7268                                 error(
7269                         "add_tree_backref failed (extent items shared block): %s",
7270                                       strerror(-ret));
7271                         break;
7272                 case BTRFS_EXTENT_DATA_REF_KEY:
7273                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7274                         add_data_backref(extent_cache, key.objectid, 0,
7275                                         btrfs_extent_data_ref_root(eb, dref),
7276                                         btrfs_extent_data_ref_objectid(eb,
7277                                                                        dref),
7278                                         btrfs_extent_data_ref_offset(eb, dref),
7279                                         btrfs_extent_data_ref_count(eb, dref),
7280                                         0, num_bytes);
7281                         break;
7282                 case BTRFS_SHARED_DATA_REF_KEY:
7283                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7284                         add_data_backref(extent_cache, key.objectid, offset,
7285                                         0, 0, 0,
7286                                         btrfs_shared_data_ref_count(eb, sref),
7287                                         0, num_bytes);
7288                         break;
7289                 default:
7290                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7291                                 key.objectid, key.type, num_bytes);
7292                         goto out;
7293                 }
7294                 ptr += btrfs_extent_inline_ref_size(type);
7295         }
7296         WARN_ON(ptr > end);
7297 out:
7298         return 0;
7299 }
7300
7301 static int check_cache_range(struct btrfs_root *root,
7302                              struct btrfs_block_group_cache *cache,
7303                              u64 offset, u64 bytes)
7304 {
7305         struct btrfs_free_space *entry;
7306         u64 *logical;
7307         u64 bytenr;
7308         int stripe_len;
7309         int i, nr, ret;
7310
7311         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7312                 bytenr = btrfs_sb_offset(i);
7313                 ret = btrfs_rmap_block(root->fs_info,
7314                                        cache->key.objectid, bytenr, 0,
7315                                        &logical, &nr, &stripe_len);
7316                 if (ret)
7317                         return ret;
7318
7319                 while (nr--) {
7320                         if (logical[nr] + stripe_len <= offset)
7321                                 continue;
7322                         if (offset + bytes <= logical[nr])
7323                                 continue;
7324                         if (logical[nr] == offset) {
7325                                 if (stripe_len >= bytes) {
7326                                         free(logical);
7327                                         return 0;
7328                                 }
7329                                 bytes -= stripe_len;
7330                                 offset += stripe_len;
7331                         } else if (logical[nr] < offset) {
7332                                 if (logical[nr] + stripe_len >=
7333                                     offset + bytes) {
7334                                         free(logical);
7335                                         return 0;
7336                                 }
7337                                 bytes = (offset + bytes) -
7338                                         (logical[nr] + stripe_len);
7339                                 offset = logical[nr] + stripe_len;
7340                         } else {
7341                                 /*
7342                                  * Could be tricky, the super may land in the
7343                                  * middle of the area we're checking.  First
7344                                  * check the easiest case, it's at the end.
7345                                  */
7346                                 if (logical[nr] + stripe_len >=
7347                                     bytes + offset) {
7348                                         bytes = logical[nr] - offset;
7349                                         continue;
7350                                 }
7351
7352                                 /* Check the left side */
7353                                 ret = check_cache_range(root, cache,
7354                                                         offset,
7355                                                         logical[nr] - offset);
7356                                 if (ret) {
7357                                         free(logical);
7358                                         return ret;
7359                                 }
7360
7361                                 /* Now we continue with the right side */
7362                                 bytes = (offset + bytes) -
7363                                         (logical[nr] + stripe_len);
7364                                 offset = logical[nr] + stripe_len;
7365                         }
7366                 }
7367
7368                 free(logical);
7369         }
7370
7371         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7372         if (!entry) {
7373                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7374                         offset, offset+bytes);
7375                 return -EINVAL;
7376         }
7377
7378         if (entry->offset != offset) {
7379                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7380                         entry->offset);
7381                 return -EINVAL;
7382         }
7383
7384         if (entry->bytes != bytes) {
7385                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7386                         bytes, entry->bytes, offset);
7387                 return -EINVAL;
7388         }
7389
7390         unlink_free_space(cache->free_space_ctl, entry);
7391         free(entry);
7392         return 0;
7393 }
7394
7395 static int verify_space_cache(struct btrfs_root *root,
7396                               struct btrfs_block_group_cache *cache)
7397 {
7398         struct btrfs_path path;
7399         struct extent_buffer *leaf;
7400         struct btrfs_key key;
7401         u64 last;
7402         int ret = 0;
7403
7404         root = root->fs_info->extent_root;
7405
7406         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7407
7408         btrfs_init_path(&path);
7409         key.objectid = last;
7410         key.offset = 0;
7411         key.type = BTRFS_EXTENT_ITEM_KEY;
7412         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7413         if (ret < 0)
7414                 goto out;
7415         ret = 0;
7416         while (1) {
7417                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7418                         ret = btrfs_next_leaf(root, &path);
7419                         if (ret < 0)
7420                                 goto out;
7421                         if (ret > 0) {
7422                                 ret = 0;
7423                                 break;
7424                         }
7425                 }
7426                 leaf = path.nodes[0];
7427                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7428                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7429                         break;
7430                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7431                     key.type != BTRFS_METADATA_ITEM_KEY) {
7432                         path.slots[0]++;
7433                         continue;
7434                 }
7435
7436                 if (last == key.objectid) {
7437                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7438                                 last = key.objectid + key.offset;
7439                         else
7440                                 last = key.objectid + root->fs_info->nodesize;
7441                         path.slots[0]++;
7442                         continue;
7443                 }
7444
7445                 ret = check_cache_range(root, cache, last,
7446                                         key.objectid - last);
7447                 if (ret)
7448                         break;
7449                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7450                         last = key.objectid + key.offset;
7451                 else
7452                         last = key.objectid + root->fs_info->nodesize;
7453                 path.slots[0]++;
7454         }
7455
7456         if (last < cache->key.objectid + cache->key.offset)
7457                 ret = check_cache_range(root, cache, last,
7458                                         cache->key.objectid +
7459                                         cache->key.offset - last);
7460
7461 out:
7462         btrfs_release_path(&path);
7463
7464         if (!ret &&
7465             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7466                 fprintf(stderr, "There are still entries left in the space "
7467                         "cache\n");
7468                 ret = -EINVAL;
7469         }
7470
7471         return ret;
7472 }
7473
7474 static int check_space_cache(struct btrfs_root *root)
7475 {
7476         struct btrfs_block_group_cache *cache;
7477         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7478         int ret;
7479         int error = 0;
7480
7481         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7482             btrfs_super_generation(root->fs_info->super_copy) !=
7483             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7484                 printf("cache and super generation don't match, space cache "
7485                        "will be invalidated\n");
7486                 return 0;
7487         }
7488
7489         if (ctx.progress_enabled) {
7490                 ctx.tp = TASK_FREE_SPACE;
7491                 task_start(ctx.info);
7492         }
7493
7494         while (1) {
7495                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7496                 if (!cache)
7497                         break;
7498
7499                 start = cache->key.objectid + cache->key.offset;
7500                 if (!cache->free_space_ctl) {
7501                         if (btrfs_init_free_space_ctl(cache,
7502                                                 root->fs_info->sectorsize)) {
7503                                 ret = -ENOMEM;
7504                                 break;
7505                         }
7506                 } else {
7507                         btrfs_remove_free_space_cache(cache);
7508                 }
7509
7510                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7511                         ret = exclude_super_stripes(root, cache);
7512                         if (ret) {
7513                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7514                                         strerror(-ret));
7515                                 error++;
7516                                 continue;
7517                         }
7518                         ret = load_free_space_tree(root->fs_info, cache);
7519                         free_excluded_extents(root, cache);
7520                         if (ret < 0) {
7521                                 fprintf(stderr, "could not load free space tree: %s\n",
7522                                         strerror(-ret));
7523                                 error++;
7524                                 continue;
7525                         }
7526                         error += ret;
7527                 } else {
7528                         ret = load_free_space_cache(root->fs_info, cache);
7529                         if (!ret)
7530                                 continue;
7531                 }
7532
7533                 ret = verify_space_cache(root, cache);
7534                 if (ret) {
7535                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7536                                 cache->key.objectid);
7537                         error++;
7538                 }
7539         }
7540
7541         task_stop(ctx.info);
7542
7543         return error ? -EINVAL : 0;
7544 }
7545
7546 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7547                         u64 num_bytes, unsigned long leaf_offset,
7548                         struct extent_buffer *eb) {
7549
7550         struct btrfs_fs_info *fs_info = root->fs_info;
7551         u64 offset = 0;
7552         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7553         char *data;
7554         unsigned long csum_offset;
7555         u32 csum;
7556         u32 csum_expected;
7557         u64 read_len;
7558         u64 data_checked = 0;
7559         u64 tmp;
7560         int ret = 0;
7561         int mirror;
7562         int num_copies;
7563
7564         if (num_bytes % fs_info->sectorsize)
7565                 return -EINVAL;
7566
7567         data = malloc(num_bytes);
7568         if (!data)
7569                 return -ENOMEM;
7570
7571         while (offset < num_bytes) {
7572                 mirror = 0;
7573 again:
7574                 read_len = num_bytes - offset;
7575                 /* read as much space once a time */
7576                 ret = read_extent_data(fs_info, data + offset,
7577                                 bytenr + offset, &read_len, mirror);
7578                 if (ret)
7579                         goto out;
7580                 data_checked = 0;
7581                 /* verify every 4k data's checksum */
7582                 while (data_checked < read_len) {
7583                         csum = ~(u32)0;
7584                         tmp = offset + data_checked;
7585
7586                         csum = btrfs_csum_data((char *)data + tmp,
7587                                                csum, fs_info->sectorsize);
7588                         btrfs_csum_final(csum, (u8 *)&csum);
7589
7590                         csum_offset = leaf_offset +
7591                                  tmp / fs_info->sectorsize * csum_size;
7592                         read_extent_buffer(eb, (char *)&csum_expected,
7593                                            csum_offset, csum_size);
7594                         /* try another mirror */
7595                         if (csum != csum_expected) {
7596                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7597                                                 mirror, bytenr + tmp,
7598                                                 csum, csum_expected);
7599                                 num_copies = btrfs_num_copies(root->fs_info,
7600                                                 bytenr, num_bytes);
7601                                 if (mirror < num_copies - 1) {
7602                                         mirror += 1;
7603                                         goto again;
7604                                 }
7605                         }
7606                         data_checked += fs_info->sectorsize;
7607                 }
7608                 offset += read_len;
7609         }
7610 out:
7611         free(data);
7612         return ret;
7613 }
7614
7615 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7616                                u64 num_bytes)
7617 {
7618         struct btrfs_path path;
7619         struct extent_buffer *leaf;
7620         struct btrfs_key key;
7621         int ret;
7622
7623         btrfs_init_path(&path);
7624         key.objectid = bytenr;
7625         key.type = BTRFS_EXTENT_ITEM_KEY;
7626         key.offset = (u64)-1;
7627
7628 again:
7629         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7630                                 0, 0);
7631         if (ret < 0) {
7632                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7633                 btrfs_release_path(&path);
7634                 return ret;
7635         } else if (ret) {
7636                 if (path.slots[0] > 0) {
7637                         path.slots[0]--;
7638                 } else {
7639                         ret = btrfs_prev_leaf(root, &path);
7640                         if (ret < 0) {
7641                                 goto out;
7642                         } else if (ret > 0) {
7643                                 ret = 0;
7644                                 goto out;
7645                         }
7646                 }
7647         }
7648
7649         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7650
7651         /*
7652          * Block group items come before extent items if they have the same
7653          * bytenr, so walk back one more just in case.  Dear future traveller,
7654          * first congrats on mastering time travel.  Now if it's not too much
7655          * trouble could you go back to 2006 and tell Chris to make the
7656          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7657          * EXTENT_ITEM_KEY please?
7658          */
7659         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7660                 if (path.slots[0] > 0) {
7661                         path.slots[0]--;
7662                 } else {
7663                         ret = btrfs_prev_leaf(root, &path);
7664                         if (ret < 0) {
7665                                 goto out;
7666                         } else if (ret > 0) {
7667                                 ret = 0;
7668                                 goto out;
7669                         }
7670                 }
7671                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7672         }
7673
7674         while (num_bytes) {
7675                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7676                         ret = btrfs_next_leaf(root, &path);
7677                         if (ret < 0) {
7678                                 fprintf(stderr, "Error going to next leaf "
7679                                         "%d\n", ret);
7680                                 btrfs_release_path(&path);
7681                                 return ret;
7682                         } else if (ret) {
7683                                 break;
7684                         }
7685                 }
7686                 leaf = path.nodes[0];
7687                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7688                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7689                         path.slots[0]++;
7690                         continue;
7691                 }
7692                 if (key.objectid + key.offset < bytenr) {
7693                         path.slots[0]++;
7694                         continue;
7695                 }
7696                 if (key.objectid > bytenr + num_bytes)
7697                         break;
7698
7699                 if (key.objectid == bytenr) {
7700                         if (key.offset >= num_bytes) {
7701                                 num_bytes = 0;
7702                                 break;
7703                         }
7704                         num_bytes -= key.offset;
7705                         bytenr += key.offset;
7706                 } else if (key.objectid < bytenr) {
7707                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7708                                 num_bytes = 0;
7709                                 break;
7710                         }
7711                         num_bytes = (bytenr + num_bytes) -
7712                                 (key.objectid + key.offset);
7713                         bytenr = key.objectid + key.offset;
7714                 } else {
7715                         if (key.objectid + key.offset < bytenr + num_bytes) {
7716                                 u64 new_start = key.objectid + key.offset;
7717                                 u64 new_bytes = bytenr + num_bytes - new_start;
7718
7719                                 /*
7720                                  * Weird case, the extent is in the middle of
7721                                  * our range, we'll have to search one side
7722                                  * and then the other.  Not sure if this happens
7723                                  * in real life, but no harm in coding it up
7724                                  * anyway just in case.
7725                                  */
7726                                 btrfs_release_path(&path);
7727                                 ret = check_extent_exists(root, new_start,
7728                                                           new_bytes);
7729                                 if (ret) {
7730                                         fprintf(stderr, "Right section didn't "
7731                                                 "have a record\n");
7732                                         break;
7733                                 }
7734                                 num_bytes = key.objectid - bytenr;
7735                                 goto again;
7736                         }
7737                         num_bytes = key.objectid - bytenr;
7738                 }
7739                 path.slots[0]++;
7740         }
7741         ret = 0;
7742
7743 out:
7744         if (num_bytes && !ret) {
7745                 fprintf(stderr, "There are no extents for csum range "
7746                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7747                 ret = 1;
7748         }
7749
7750         btrfs_release_path(&path);
7751         return ret;
7752 }
7753
7754 static int check_csums(struct btrfs_root *root)
7755 {
7756         struct btrfs_path path;
7757         struct extent_buffer *leaf;
7758         struct btrfs_key key;
7759         u64 offset = 0, num_bytes = 0;
7760         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7761         int errors = 0;
7762         int ret;
7763         u64 data_len;
7764         unsigned long leaf_offset;
7765
7766         root = root->fs_info->csum_root;
7767         if (!extent_buffer_uptodate(root->node)) {
7768                 fprintf(stderr, "No valid csum tree found\n");
7769                 return -ENOENT;
7770         }
7771
7772         btrfs_init_path(&path);
7773         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7774         key.type = BTRFS_EXTENT_CSUM_KEY;
7775         key.offset = 0;
7776         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7777         if (ret < 0) {
7778                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7779                 btrfs_release_path(&path);
7780                 return ret;
7781         }
7782
7783         if (ret > 0 && path.slots[0])
7784                 path.slots[0]--;
7785         ret = 0;
7786
7787         while (1) {
7788                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7789                         ret = btrfs_next_leaf(root, &path);
7790                         if (ret < 0) {
7791                                 fprintf(stderr, "Error going to next leaf "
7792                                         "%d\n", ret);
7793                                 break;
7794                         }
7795                         if (ret)
7796                                 break;
7797                 }
7798                 leaf = path.nodes[0];
7799
7800                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7801                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7802                         path.slots[0]++;
7803                         continue;
7804                 }
7805
7806                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7807                               csum_size) * root->fs_info->sectorsize;
7808                 if (!check_data_csum)
7809                         goto skip_csum_check;
7810                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7811                 ret = check_extent_csums(root, key.offset, data_len,
7812                                          leaf_offset, leaf);
7813                 if (ret)
7814                         break;
7815 skip_csum_check:
7816                 if (!num_bytes) {
7817                         offset = key.offset;
7818                 } else if (key.offset != offset + num_bytes) {
7819                         ret = check_extent_exists(root, offset, num_bytes);
7820                         if (ret) {
7821                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7822                                         "there is no extent record\n",
7823                                         offset, offset+num_bytes);
7824                                 errors++;
7825                         }
7826                         offset = key.offset;
7827                         num_bytes = 0;
7828                 }
7829                 num_bytes += data_len;
7830                 path.slots[0]++;
7831         }
7832
7833         btrfs_release_path(&path);
7834         return errors;
7835 }
7836
7837 static int is_dropped_key(struct btrfs_key *key,
7838                           struct btrfs_key *drop_key) {
7839         if (key->objectid < drop_key->objectid)
7840                 return 1;
7841         else if (key->objectid == drop_key->objectid) {
7842                 if (key->type < drop_key->type)
7843                         return 1;
7844                 else if (key->type == drop_key->type) {
7845                         if (key->offset < drop_key->offset)
7846                                 return 1;
7847                 }
7848         }
7849         return 0;
7850 }
7851
7852 /*
7853  * Here are the rules for FULL_BACKREF.
7854  *
7855  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7856  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7857  *      FULL_BACKREF set.
7858  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7859  *    if it happened after the relocation occurred since we'll have dropped the
7860  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7861  *    have no real way to know for sure.
7862  *
7863  * We process the blocks one root at a time, and we start from the lowest root
7864  * objectid and go to the highest.  So we can just lookup the owner backref for
7865  * the record and if we don't find it then we know it doesn't exist and we have
7866  * a FULL BACKREF.
7867  *
7868  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7869  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7870  * be set or not and then we can check later once we've gathered all the refs.
7871  */
7872 static int calc_extent_flag(struct cache_tree *extent_cache,
7873                            struct extent_buffer *buf,
7874                            struct root_item_record *ri,
7875                            u64 *flags)
7876 {
7877         struct extent_record *rec;
7878         struct cache_extent *cache;
7879         struct tree_backref *tback;
7880         u64 owner = 0;
7881
7882         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7883         /* we have added this extent before */
7884         if (!cache)
7885                 return -ENOENT;
7886
7887         rec = container_of(cache, struct extent_record, cache);
7888
7889         /*
7890          * Except file/reloc tree, we can not have
7891          * FULL BACKREF MODE
7892          */
7893         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7894                 goto normal;
7895         /*
7896          * root node
7897          */
7898         if (buf->start == ri->bytenr)
7899                 goto normal;
7900
7901         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7902                 goto full_backref;
7903
7904         owner = btrfs_header_owner(buf);
7905         if (owner == ri->objectid)
7906                 goto normal;
7907
7908         tback = find_tree_backref(rec, 0, owner);
7909         if (!tback)
7910                 goto full_backref;
7911 normal:
7912         *flags = 0;
7913         if (rec->flag_block_full_backref != FLAG_UNSET &&
7914             rec->flag_block_full_backref != 0)
7915                 rec->bad_full_backref = 1;
7916         return 0;
7917 full_backref:
7918         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7919         if (rec->flag_block_full_backref != FLAG_UNSET &&
7920             rec->flag_block_full_backref != 1)
7921                 rec->bad_full_backref = 1;
7922         return 0;
7923 }
7924
7925 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7926 {
7927         fprintf(stderr, "Invalid key type(");
7928         print_key_type(stderr, 0, key_type);
7929         fprintf(stderr, ") found in root(");
7930         print_objectid(stderr, rootid, 0);
7931         fprintf(stderr, ")\n");
7932 }
7933
7934 /*
7935  * Check if the key is valid with its extent buffer.
7936  *
7937  * This is a early check in case invalid key exists in a extent buffer
7938  * This is not comprehensive yet, but should prevent wrong key/item passed
7939  * further
7940  */
7941 static int check_type_with_root(u64 rootid, u8 key_type)
7942 {
7943         switch (key_type) {
7944         /* Only valid in chunk tree */
7945         case BTRFS_DEV_ITEM_KEY:
7946         case BTRFS_CHUNK_ITEM_KEY:
7947                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7948                         goto err;
7949                 break;
7950         /* valid in csum and log tree */
7951         case BTRFS_CSUM_TREE_OBJECTID:
7952                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7953                       is_fstree(rootid)))
7954                         goto err;
7955                 break;
7956         case BTRFS_EXTENT_ITEM_KEY:
7957         case BTRFS_METADATA_ITEM_KEY:
7958         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7959                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7960                         goto err;
7961                 break;
7962         case BTRFS_ROOT_ITEM_KEY:
7963                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7964                         goto err;
7965                 break;
7966         case BTRFS_DEV_EXTENT_KEY:
7967                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7968                         goto err;
7969                 break;
7970         }
7971         return 0;
7972 err:
7973         report_mismatch_key_root(key_type, rootid);
7974         return -EINVAL;
7975 }
7976
7977 static int run_next_block(struct btrfs_root *root,
7978                           struct block_info *bits,
7979                           int bits_nr,
7980                           u64 *last,
7981                           struct cache_tree *pending,
7982                           struct cache_tree *seen,
7983                           struct cache_tree *reada,
7984                           struct cache_tree *nodes,
7985                           struct cache_tree *extent_cache,
7986                           struct cache_tree *chunk_cache,
7987                           struct rb_root *dev_cache,
7988                           struct block_group_tree *block_group_cache,
7989                           struct device_extent_tree *dev_extent_cache,
7990                           struct root_item_record *ri)
7991 {
7992         struct btrfs_fs_info *fs_info = root->fs_info;
7993         struct extent_buffer *buf;
7994         struct extent_record *rec = NULL;
7995         u64 bytenr;
7996         u32 size;
7997         u64 parent;
7998         u64 owner;
7999         u64 flags;
8000         u64 ptr;
8001         u64 gen = 0;
8002         int ret = 0;
8003         int i;
8004         int nritems;
8005         struct btrfs_key key;
8006         struct cache_extent *cache;
8007         int reada_bits;
8008
8009         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8010                                     bits_nr, &reada_bits);
8011         if (nritems == 0)
8012                 return 1;
8013
8014         if (!reada_bits) {
8015                 for(i = 0; i < nritems; i++) {
8016                         ret = add_cache_extent(reada, bits[i].start,
8017                                                bits[i].size);
8018                         if (ret == -EEXIST)
8019                                 continue;
8020
8021                         /* fixme, get the parent transid */
8022                         readahead_tree_block(fs_info, bits[i].start, 0);
8023                 }
8024         }
8025         *last = bits[0].start;
8026         bytenr = bits[0].start;
8027         size = bits[0].size;
8028
8029         cache = lookup_cache_extent(pending, bytenr, size);
8030         if (cache) {
8031                 remove_cache_extent(pending, cache);
8032                 free(cache);
8033         }
8034         cache = lookup_cache_extent(reada, bytenr, size);
8035         if (cache) {
8036                 remove_cache_extent(reada, cache);
8037                 free(cache);
8038         }
8039         cache = lookup_cache_extent(nodes, bytenr, size);
8040         if (cache) {
8041                 remove_cache_extent(nodes, cache);
8042                 free(cache);
8043         }
8044         cache = lookup_cache_extent(extent_cache, bytenr, size);
8045         if (cache) {
8046                 rec = container_of(cache, struct extent_record, cache);
8047                 gen = rec->parent_generation;
8048         }
8049
8050         /* fixme, get the real parent transid */
8051         buf = read_tree_block(root->fs_info, bytenr, gen);
8052         if (!extent_buffer_uptodate(buf)) {
8053                 record_bad_block_io(root->fs_info,
8054                                     extent_cache, bytenr, size);
8055                 goto out;
8056         }
8057
8058         nritems = btrfs_header_nritems(buf);
8059
8060         flags = 0;
8061         if (!init_extent_tree) {
8062                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8063                                        btrfs_header_level(buf), 1, NULL,
8064                                        &flags);
8065                 if (ret < 0) {
8066                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8067                         if (ret < 0) {
8068                                 fprintf(stderr, "Couldn't calc extent flags\n");
8069                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8070                         }
8071                 }
8072         } else {
8073                 flags = 0;
8074                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8075                 if (ret < 0) {
8076                         fprintf(stderr, "Couldn't calc extent flags\n");
8077                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8078                 }
8079         }
8080
8081         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8082                 if (ri != NULL &&
8083                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8084                     ri->objectid == btrfs_header_owner(buf)) {
8085                         /*
8086                          * Ok we got to this block from it's original owner and
8087                          * we have FULL_BACKREF set.  Relocation can leave
8088                          * converted blocks over so this is altogether possible,
8089                          * however it's not possible if the generation > the
8090                          * last snapshot, so check for this case.
8091                          */
8092                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8093                             btrfs_header_generation(buf) > ri->last_snapshot) {
8094                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8095                                 rec->bad_full_backref = 1;
8096                         }
8097                 }
8098         } else {
8099                 if (ri != NULL &&
8100                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8101                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8102                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8103                         rec->bad_full_backref = 1;
8104                 }
8105         }
8106
8107         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8108                 rec->flag_block_full_backref = 1;
8109                 parent = bytenr;
8110                 owner = 0;
8111         } else {
8112                 rec->flag_block_full_backref = 0;
8113                 parent = 0;
8114                 owner = btrfs_header_owner(buf);
8115         }
8116
8117         ret = check_block(root, extent_cache, buf, flags);
8118         if (ret)
8119                 goto out;
8120
8121         if (btrfs_is_leaf(buf)) {
8122                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8123                 for (i = 0; i < nritems; i++) {
8124                         struct btrfs_file_extent_item *fi;
8125                         btrfs_item_key_to_cpu(buf, &key, i);
8126                         /*
8127                          * Check key type against the leaf owner.
8128                          * Could filter quite a lot of early error if
8129                          * owner is correct
8130                          */
8131                         if (check_type_with_root(btrfs_header_owner(buf),
8132                                                  key.type)) {
8133                                 fprintf(stderr, "ignoring invalid key\n");
8134                                 continue;
8135                         }
8136                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8137                                 process_extent_item(root, extent_cache, buf,
8138                                                     i);
8139                                 continue;
8140                         }
8141                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8142                                 process_extent_item(root, extent_cache, buf,
8143                                                     i);
8144                                 continue;
8145                         }
8146                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8147                                 total_csum_bytes +=
8148                                         btrfs_item_size_nr(buf, i);
8149                                 continue;
8150                         }
8151                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8152                                 process_chunk_item(chunk_cache, &key, buf, i);
8153                                 continue;
8154                         }
8155                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8156                                 process_device_item(dev_cache, &key, buf, i);
8157                                 continue;
8158                         }
8159                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8160                                 process_block_group_item(block_group_cache,
8161                                         &key, buf, i);
8162                                 continue;
8163                         }
8164                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8165                                 process_device_extent_item(dev_extent_cache,
8166                                         &key, buf, i);
8167                                 continue;
8168
8169                         }
8170                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8171 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8172                                 process_extent_ref_v0(extent_cache, buf, i);
8173 #else
8174                                 BUG();
8175 #endif
8176                                 continue;
8177                         }
8178
8179                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8180                                 ret = add_tree_backref(extent_cache,
8181                                                 key.objectid, 0, key.offset, 0);
8182                                 if (ret < 0)
8183                                         error(
8184                                 "add_tree_backref failed (leaf tree block): %s",
8185                                               strerror(-ret));
8186                                 continue;
8187                         }
8188                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8189                                 ret = add_tree_backref(extent_cache,
8190                                                 key.objectid, key.offset, 0, 0);
8191                                 if (ret < 0)
8192                                         error(
8193                                 "add_tree_backref failed (leaf shared block): %s",
8194                                               strerror(-ret));
8195                                 continue;
8196                         }
8197                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8198                                 struct btrfs_extent_data_ref *ref;
8199                                 ref = btrfs_item_ptr(buf, i,
8200                                                 struct btrfs_extent_data_ref);
8201                                 add_data_backref(extent_cache,
8202                                         key.objectid, 0,
8203                                         btrfs_extent_data_ref_root(buf, ref),
8204                                         btrfs_extent_data_ref_objectid(buf,
8205                                                                        ref),
8206                                         btrfs_extent_data_ref_offset(buf, ref),
8207                                         btrfs_extent_data_ref_count(buf, ref),
8208                                         0, root->fs_info->sectorsize);
8209                                 continue;
8210                         }
8211                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8212                                 struct btrfs_shared_data_ref *ref;
8213                                 ref = btrfs_item_ptr(buf, i,
8214                                                 struct btrfs_shared_data_ref);
8215                                 add_data_backref(extent_cache,
8216                                         key.objectid, key.offset, 0, 0, 0,
8217                                         btrfs_shared_data_ref_count(buf, ref),
8218                                         0, root->fs_info->sectorsize);
8219                                 continue;
8220                         }
8221                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8222                                 struct bad_item *bad;
8223
8224                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8225                                         continue;
8226                                 if (!owner)
8227                                         continue;
8228                                 bad = malloc(sizeof(struct bad_item));
8229                                 if (!bad)
8230                                         continue;
8231                                 INIT_LIST_HEAD(&bad->list);
8232                                 memcpy(&bad->key, &key,
8233                                        sizeof(struct btrfs_key));
8234                                 bad->root_id = owner;
8235                                 list_add_tail(&bad->list, &delete_items);
8236                                 continue;
8237                         }
8238                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8239                                 continue;
8240                         fi = btrfs_item_ptr(buf, i,
8241                                             struct btrfs_file_extent_item);
8242                         if (btrfs_file_extent_type(buf, fi) ==
8243                             BTRFS_FILE_EXTENT_INLINE)
8244                                 continue;
8245                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8246                                 continue;
8247
8248                         data_bytes_allocated +=
8249                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8250                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8251                                 abort();
8252                         }
8253                         data_bytes_referenced +=
8254                                 btrfs_file_extent_num_bytes(buf, fi);
8255                         add_data_backref(extent_cache,
8256                                 btrfs_file_extent_disk_bytenr(buf, fi),
8257                                 parent, owner, key.objectid, key.offset -
8258                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8259                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8260                 }
8261         } else {
8262                 int level;
8263                 struct btrfs_key first_key;
8264
8265                 first_key.objectid = 0;
8266
8267                 if (nritems > 0)
8268                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8269                 level = btrfs_header_level(buf);
8270                 for (i = 0; i < nritems; i++) {
8271                         struct extent_record tmpl;
8272
8273                         ptr = btrfs_node_blockptr(buf, i);
8274                         size = root->fs_info->nodesize;
8275                         btrfs_node_key_to_cpu(buf, &key, i);
8276                         if (ri != NULL) {
8277                                 if ((level == ri->drop_level)
8278                                     && is_dropped_key(&key, &ri->drop_key)) {
8279                                         continue;
8280                                 }
8281                         }
8282
8283                         memset(&tmpl, 0, sizeof(tmpl));
8284                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8285                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8286                         tmpl.start = ptr;
8287                         tmpl.nr = size;
8288                         tmpl.refs = 1;
8289                         tmpl.metadata = 1;
8290                         tmpl.max_size = size;
8291                         ret = add_extent_rec(extent_cache, &tmpl);
8292                         if (ret < 0)
8293                                 goto out;
8294
8295                         ret = add_tree_backref(extent_cache, ptr, parent,
8296                                         owner, 1);
8297                         if (ret < 0) {
8298                                 error(
8299                                 "add_tree_backref failed (non-leaf block): %s",
8300                                       strerror(-ret));
8301                                 continue;
8302                         }
8303
8304                         if (level > 1) {
8305                                 add_pending(nodes, seen, ptr, size);
8306                         } else {
8307                                 add_pending(pending, seen, ptr, size);
8308                         }
8309                 }
8310                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8311                                       nritems) * sizeof(struct btrfs_key_ptr);
8312         }
8313         total_btree_bytes += buf->len;
8314         if (fs_root_objectid(btrfs_header_owner(buf)))
8315                 total_fs_tree_bytes += buf->len;
8316         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8317                 total_extent_tree_bytes += buf->len;
8318 out:
8319         free_extent_buffer(buf);
8320         return ret;
8321 }
8322
8323 static int add_root_to_pending(struct extent_buffer *buf,
8324                                struct cache_tree *extent_cache,
8325                                struct cache_tree *pending,
8326                                struct cache_tree *seen,
8327                                struct cache_tree *nodes,
8328                                u64 objectid)
8329 {
8330         struct extent_record tmpl;
8331         int ret;
8332
8333         if (btrfs_header_level(buf) > 0)
8334                 add_pending(nodes, seen, buf->start, buf->len);
8335         else
8336                 add_pending(pending, seen, buf->start, buf->len);
8337
8338         memset(&tmpl, 0, sizeof(tmpl));
8339         tmpl.start = buf->start;
8340         tmpl.nr = buf->len;
8341         tmpl.is_root = 1;
8342         tmpl.refs = 1;
8343         tmpl.metadata = 1;
8344         tmpl.max_size = buf->len;
8345         add_extent_rec(extent_cache, &tmpl);
8346
8347         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8348             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8349                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8350                                 0, 1);
8351         else
8352                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8353                                 1);
8354         return ret;
8355 }
8356
8357 /* as we fix the tree, we might be deleting blocks that
8358  * we're tracking for repair.  This hook makes sure we
8359  * remove any backrefs for blocks as we are fixing them.
8360  */
8361 static int free_extent_hook(struct btrfs_trans_handle *trans,
8362                             struct btrfs_root *root,
8363                             u64 bytenr, u64 num_bytes, u64 parent,
8364                             u64 root_objectid, u64 owner, u64 offset,
8365                             int refs_to_drop)
8366 {
8367         struct extent_record *rec;
8368         struct cache_extent *cache;
8369         int is_data;
8370         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8371
8372         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8373         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8374         if (!cache)
8375                 return 0;
8376
8377         rec = container_of(cache, struct extent_record, cache);
8378         if (is_data) {
8379                 struct data_backref *back;
8380                 back = find_data_backref(rec, parent, root_objectid, owner,
8381                                          offset, 1, bytenr, num_bytes);
8382                 if (!back)
8383                         goto out;
8384                 if (back->node.found_ref) {
8385                         back->found_ref -= refs_to_drop;
8386                         if (rec->refs)
8387                                 rec->refs -= refs_to_drop;
8388                 }
8389                 if (back->node.found_extent_tree) {
8390                         back->num_refs -= refs_to_drop;
8391                         if (rec->extent_item_refs)
8392                                 rec->extent_item_refs -= refs_to_drop;
8393                 }
8394                 if (back->found_ref == 0)
8395                         back->node.found_ref = 0;
8396                 if (back->num_refs == 0)
8397                         back->node.found_extent_tree = 0;
8398
8399                 if (!back->node.found_extent_tree && back->node.found_ref) {
8400                         rb_erase(&back->node.node, &rec->backref_tree);
8401                         free(back);
8402                 }
8403         } else {
8404                 struct tree_backref *back;
8405                 back = find_tree_backref(rec, parent, root_objectid);
8406                 if (!back)
8407                         goto out;
8408                 if (back->node.found_ref) {
8409                         if (rec->refs)
8410                                 rec->refs--;
8411                         back->node.found_ref = 0;
8412                 }
8413                 if (back->node.found_extent_tree) {
8414                         if (rec->extent_item_refs)
8415                                 rec->extent_item_refs--;
8416                         back->node.found_extent_tree = 0;
8417                 }
8418                 if (!back->node.found_extent_tree && back->node.found_ref) {
8419                         rb_erase(&back->node.node, &rec->backref_tree);
8420                         free(back);
8421                 }
8422         }
8423         maybe_free_extent_rec(extent_cache, rec);
8424 out:
8425         return 0;
8426 }
8427
8428 static int delete_extent_records(struct btrfs_trans_handle *trans,
8429                                  struct btrfs_root *root,
8430                                  struct btrfs_path *path,
8431                                  u64 bytenr)
8432 {
8433         struct btrfs_key key;
8434         struct btrfs_key found_key;
8435         struct extent_buffer *leaf;
8436         int ret;
8437         int slot;
8438
8439
8440         key.objectid = bytenr;
8441         key.type = (u8)-1;
8442         key.offset = (u64)-1;
8443
8444         while(1) {
8445                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8446                                         &key, path, 0, 1);
8447                 if (ret < 0)
8448                         break;
8449
8450                 if (ret > 0) {
8451                         ret = 0;
8452                         if (path->slots[0] == 0)
8453                                 break;
8454                         path->slots[0]--;
8455                 }
8456                 ret = 0;
8457
8458                 leaf = path->nodes[0];
8459                 slot = path->slots[0];
8460
8461                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8462                 if (found_key.objectid != bytenr)
8463                         break;
8464
8465                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8466                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8467                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8468                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8469                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8470                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8471                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8472                         btrfs_release_path(path);
8473                         if (found_key.type == 0) {
8474                                 if (found_key.offset == 0)
8475                                         break;
8476                                 key.offset = found_key.offset - 1;
8477                                 key.type = found_key.type;
8478                         }
8479                         key.type = found_key.type - 1;
8480                         key.offset = (u64)-1;
8481                         continue;
8482                 }
8483
8484                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8485                         found_key.objectid, found_key.type, found_key.offset);
8486
8487                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8488                 if (ret)
8489                         break;
8490                 btrfs_release_path(path);
8491
8492                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8493                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8494                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8495                                 found_key.offset : root->fs_info->nodesize;
8496
8497                         ret = btrfs_update_block_group(trans, root, bytenr,
8498                                                        bytes, 0, 0);
8499                         if (ret)
8500                                 break;
8501                 }
8502         }
8503
8504         btrfs_release_path(path);
8505         return ret;
8506 }
8507
8508 /*
8509  * for a single backref, this will allocate a new extent
8510  * and add the backref to it.
8511  */
8512 static int record_extent(struct btrfs_trans_handle *trans,
8513                          struct btrfs_fs_info *info,
8514                          struct btrfs_path *path,
8515                          struct extent_record *rec,
8516                          struct extent_backref *back,
8517                          int allocated, u64 flags)
8518 {
8519         int ret = 0;
8520         struct btrfs_root *extent_root = info->extent_root;
8521         struct extent_buffer *leaf;
8522         struct btrfs_key ins_key;
8523         struct btrfs_extent_item *ei;
8524         struct data_backref *dback;
8525         struct btrfs_tree_block_info *bi;
8526
8527         if (!back->is_data)
8528                 rec->max_size = max_t(u64, rec->max_size,
8529                                     info->nodesize);
8530
8531         if (!allocated) {
8532                 u32 item_size = sizeof(*ei);
8533
8534                 if (!back->is_data)
8535                         item_size += sizeof(*bi);
8536
8537                 ins_key.objectid = rec->start;
8538                 ins_key.offset = rec->max_size;
8539                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8540
8541                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8542                                         &ins_key, item_size);
8543                 if (ret)
8544                         goto fail;
8545
8546                 leaf = path->nodes[0];
8547                 ei = btrfs_item_ptr(leaf, path->slots[0],
8548                                     struct btrfs_extent_item);
8549
8550                 btrfs_set_extent_refs(leaf, ei, 0);
8551                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8552
8553                 if (back->is_data) {
8554                         btrfs_set_extent_flags(leaf, ei,
8555                                                BTRFS_EXTENT_FLAG_DATA);
8556                 } else {
8557                         struct btrfs_disk_key copy_key;;
8558
8559                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8560                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8561                                              sizeof(*bi));
8562
8563                         btrfs_set_disk_key_objectid(&copy_key,
8564                                                     rec->info_objectid);
8565                         btrfs_set_disk_key_type(&copy_key, 0);
8566                         btrfs_set_disk_key_offset(&copy_key, 0);
8567
8568                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8569                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8570
8571                         btrfs_set_extent_flags(leaf, ei,
8572                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8573                 }
8574
8575                 btrfs_mark_buffer_dirty(leaf);
8576                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8577                                                rec->max_size, 1, 0);
8578                 if (ret)
8579                         goto fail;
8580                 btrfs_release_path(path);
8581         }
8582
8583         if (back->is_data) {
8584                 u64 parent;
8585                 int i;
8586
8587                 dback = to_data_backref(back);
8588                 if (back->full_backref)
8589                         parent = dback->parent;
8590                 else
8591                         parent = 0;
8592
8593                 for (i = 0; i < dback->found_ref; i++) {
8594                         /* if parent != 0, we're doing a full backref
8595                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8596                          * just makes the backref allocator create a data
8597                          * backref
8598                          */
8599                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8600                                                    rec->start, rec->max_size,
8601                                                    parent,
8602                                                    dback->root,
8603                                                    parent ?
8604                                                    BTRFS_FIRST_FREE_OBJECTID :
8605                                                    dback->owner,
8606                                                    dback->offset);
8607                         if (ret)
8608                                 break;
8609                 }
8610                 fprintf(stderr, "adding new data backref"
8611                                 " on %llu %s %llu owner %llu"
8612                                 " offset %llu found %d\n",
8613                                 (unsigned long long)rec->start,
8614                                 back->full_backref ?
8615                                 "parent" : "root",
8616                                 back->full_backref ?
8617                                 (unsigned long long)parent :
8618                                 (unsigned long long)dback->root,
8619                                 (unsigned long long)dback->owner,
8620                                 (unsigned long long)dback->offset,
8621                                 dback->found_ref);
8622         } else {
8623                 u64 parent;
8624                 struct tree_backref *tback;
8625
8626                 tback = to_tree_backref(back);
8627                 if (back->full_backref)
8628                         parent = tback->parent;
8629                 else
8630                         parent = 0;
8631
8632                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8633                                            rec->start, rec->max_size,
8634                                            parent, tback->root, 0, 0);
8635                 fprintf(stderr, "adding new tree backref on "
8636                         "start %llu len %llu parent %llu root %llu\n",
8637                         rec->start, rec->max_size, parent, tback->root);
8638         }
8639 fail:
8640         btrfs_release_path(path);
8641         return ret;
8642 }
8643
8644 static struct extent_entry *find_entry(struct list_head *entries,
8645                                        u64 bytenr, u64 bytes)
8646 {
8647         struct extent_entry *entry = NULL;
8648
8649         list_for_each_entry(entry, entries, list) {
8650                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8651                         return entry;
8652         }
8653
8654         return NULL;
8655 }
8656
8657 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8658 {
8659         struct extent_entry *entry, *best = NULL, *prev = NULL;
8660
8661         list_for_each_entry(entry, entries, list) {
8662                 /*
8663                  * If there are as many broken entries as entries then we know
8664                  * not to trust this particular entry.
8665                  */
8666                 if (entry->broken == entry->count)
8667                         continue;
8668
8669                 /*
8670                  * Special case, when there are only two entries and 'best' is
8671                  * the first one
8672                  */
8673                 if (!prev) {
8674                         best = entry;
8675                         prev = entry;
8676                         continue;
8677                 }
8678
8679                 /*
8680                  * If our current entry == best then we can't be sure our best
8681                  * is really the best, so we need to keep searching.
8682                  */
8683                 if (best && best->count == entry->count) {
8684                         prev = entry;
8685                         best = NULL;
8686                         continue;
8687                 }
8688
8689                 /* Prev == entry, not good enough, have to keep searching */
8690                 if (!prev->broken && prev->count == entry->count)
8691                         continue;
8692
8693                 if (!best)
8694                         best = (prev->count > entry->count) ? prev : entry;
8695                 else if (best->count < entry->count)
8696                         best = entry;
8697                 prev = entry;
8698         }
8699
8700         return best;
8701 }
8702
8703 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8704                       struct data_backref *dback, struct extent_entry *entry)
8705 {
8706         struct btrfs_trans_handle *trans;
8707         struct btrfs_root *root;
8708         struct btrfs_file_extent_item *fi;
8709         struct extent_buffer *leaf;
8710         struct btrfs_key key;
8711         u64 bytenr, bytes;
8712         int ret, err;
8713
8714         key.objectid = dback->root;
8715         key.type = BTRFS_ROOT_ITEM_KEY;
8716         key.offset = (u64)-1;
8717         root = btrfs_read_fs_root(info, &key);
8718         if (IS_ERR(root)) {
8719                 fprintf(stderr, "Couldn't find root for our ref\n");
8720                 return -EINVAL;
8721         }
8722
8723         /*
8724          * The backref points to the original offset of the extent if it was
8725          * split, so we need to search down to the offset we have and then walk
8726          * forward until we find the backref we're looking for.
8727          */
8728         key.objectid = dback->owner;
8729         key.type = BTRFS_EXTENT_DATA_KEY;
8730         key.offset = dback->offset;
8731         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8732         if (ret < 0) {
8733                 fprintf(stderr, "Error looking up ref %d\n", ret);
8734                 return ret;
8735         }
8736
8737         while (1) {
8738                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8739                         ret = btrfs_next_leaf(root, path);
8740                         if (ret) {
8741                                 fprintf(stderr, "Couldn't find our ref, next\n");
8742                                 return -EINVAL;
8743                         }
8744                 }
8745                 leaf = path->nodes[0];
8746                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8747                 if (key.objectid != dback->owner ||
8748                     key.type != BTRFS_EXTENT_DATA_KEY) {
8749                         fprintf(stderr, "Couldn't find our ref, search\n");
8750                         return -EINVAL;
8751                 }
8752                 fi = btrfs_item_ptr(leaf, path->slots[0],
8753                                     struct btrfs_file_extent_item);
8754                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8755                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8756
8757                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8758                         break;
8759                 path->slots[0]++;
8760         }
8761
8762         btrfs_release_path(path);
8763
8764         trans = btrfs_start_transaction(root, 1);
8765         if (IS_ERR(trans))
8766                 return PTR_ERR(trans);
8767
8768         /*
8769          * Ok we have the key of the file extent we want to fix, now we can cow
8770          * down to the thing and fix it.
8771          */
8772         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8773         if (ret < 0) {
8774                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8775                         key.objectid, key.type, key.offset, ret);
8776                 goto out;
8777         }
8778         if (ret > 0) {
8779                 fprintf(stderr, "Well that's odd, we just found this key "
8780                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8781                         key.offset);
8782                 ret = -EINVAL;
8783                 goto out;
8784         }
8785         leaf = path->nodes[0];
8786         fi = btrfs_item_ptr(leaf, path->slots[0],
8787                             struct btrfs_file_extent_item);
8788
8789         if (btrfs_file_extent_compression(leaf, fi) &&
8790             dback->disk_bytenr != entry->bytenr) {
8791                 fprintf(stderr, "Ref doesn't match the record start and is "
8792                         "compressed, please take a btrfs-image of this file "
8793                         "system and send it to a btrfs developer so they can "
8794                         "complete this functionality for bytenr %Lu\n",
8795                         dback->disk_bytenr);
8796                 ret = -EINVAL;
8797                 goto out;
8798         }
8799
8800         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8801                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8802         } else if (dback->disk_bytenr > entry->bytenr) {
8803                 u64 off_diff, offset;
8804
8805                 off_diff = dback->disk_bytenr - entry->bytenr;
8806                 offset = btrfs_file_extent_offset(leaf, fi);
8807                 if (dback->disk_bytenr + offset +
8808                     btrfs_file_extent_num_bytes(leaf, fi) >
8809                     entry->bytenr + entry->bytes) {
8810                         fprintf(stderr, "Ref is past the entry end, please "
8811                                 "take a btrfs-image of this file system and "
8812                                 "send it to a btrfs developer, ref %Lu\n",
8813                                 dback->disk_bytenr);
8814                         ret = -EINVAL;
8815                         goto out;
8816                 }
8817                 offset += off_diff;
8818                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8819                 btrfs_set_file_extent_offset(leaf, fi, offset);
8820         } else if (dback->disk_bytenr < entry->bytenr) {
8821                 u64 offset;
8822
8823                 offset = btrfs_file_extent_offset(leaf, fi);
8824                 if (dback->disk_bytenr + offset < entry->bytenr) {
8825                         fprintf(stderr, "Ref is before the entry start, please"
8826                                 " take a btrfs-image of this file system and "
8827                                 "send it to a btrfs developer, ref %Lu\n",
8828                                 dback->disk_bytenr);
8829                         ret = -EINVAL;
8830                         goto out;
8831                 }
8832
8833                 offset += dback->disk_bytenr;
8834                 offset -= entry->bytenr;
8835                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8836                 btrfs_set_file_extent_offset(leaf, fi, offset);
8837         }
8838
8839         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8840
8841         /*
8842          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8843          * only do this if we aren't using compression, otherwise it's a
8844          * trickier case.
8845          */
8846         if (!btrfs_file_extent_compression(leaf, fi))
8847                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8848         else
8849                 printf("ram bytes may be wrong?\n");
8850         btrfs_mark_buffer_dirty(leaf);
8851 out:
8852         err = btrfs_commit_transaction(trans, root);
8853         btrfs_release_path(path);
8854         return ret ? ret : err;
8855 }
8856
8857 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8858                            struct extent_record *rec)
8859 {
8860         struct extent_backref *back, *tmp;
8861         struct data_backref *dback;
8862         struct extent_entry *entry, *best = NULL;
8863         LIST_HEAD(entries);
8864         int nr_entries = 0;
8865         int broken_entries = 0;
8866         int ret = 0;
8867         short mismatch = 0;
8868
8869         /*
8870          * Metadata is easy and the backrefs should always agree on bytenr and
8871          * size, if not we've got bigger issues.
8872          */
8873         if (rec->metadata)
8874                 return 0;
8875
8876         rbtree_postorder_for_each_entry_safe(back, tmp,
8877                                              &rec->backref_tree, node) {
8878                 if (back->full_backref || !back->is_data)
8879                         continue;
8880
8881                 dback = to_data_backref(back);
8882
8883                 /*
8884                  * We only pay attention to backrefs that we found a real
8885                  * backref for.
8886                  */
8887                 if (dback->found_ref == 0)
8888                         continue;
8889
8890                 /*
8891                  * For now we only catch when the bytes don't match, not the
8892                  * bytenr.  We can easily do this at the same time, but I want
8893                  * to have a fs image to test on before we just add repair
8894                  * functionality willy-nilly so we know we won't screw up the
8895                  * repair.
8896                  */
8897
8898                 entry = find_entry(&entries, dback->disk_bytenr,
8899                                    dback->bytes);
8900                 if (!entry) {
8901                         entry = malloc(sizeof(struct extent_entry));
8902                         if (!entry) {
8903                                 ret = -ENOMEM;
8904                                 goto out;
8905                         }
8906                         memset(entry, 0, sizeof(*entry));
8907                         entry->bytenr = dback->disk_bytenr;
8908                         entry->bytes = dback->bytes;
8909                         list_add_tail(&entry->list, &entries);
8910                         nr_entries++;
8911                 }
8912
8913                 /*
8914                  * If we only have on entry we may think the entries agree when
8915                  * in reality they don't so we have to do some extra checking.
8916                  */
8917                 if (dback->disk_bytenr != rec->start ||
8918                     dback->bytes != rec->nr || back->broken)
8919                         mismatch = 1;
8920
8921                 if (back->broken) {
8922                         entry->broken++;
8923                         broken_entries++;
8924                 }
8925
8926                 entry->count++;
8927         }
8928
8929         /* Yay all the backrefs agree, carry on good sir */
8930         if (nr_entries <= 1 && !mismatch)
8931                 goto out;
8932
8933         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8934                 "%Lu\n", rec->start);
8935
8936         /*
8937          * First we want to see if the backrefs can agree amongst themselves who
8938          * is right, so figure out which one of the entries has the highest
8939          * count.
8940          */
8941         best = find_most_right_entry(&entries);
8942
8943         /*
8944          * Ok so we may have an even split between what the backrefs think, so
8945          * this is where we use the extent ref to see what it thinks.
8946          */
8947         if (!best) {
8948                 entry = find_entry(&entries, rec->start, rec->nr);
8949                 if (!entry && (!broken_entries || !rec->found_rec)) {
8950                         fprintf(stderr, "Backrefs don't agree with each other "
8951                                 "and extent record doesn't agree with anybody,"
8952                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8953                                 rec->start, rec->nr);
8954                         ret = -EINVAL;
8955                         goto out;
8956                 } else if (!entry) {
8957                         /*
8958                          * Ok our backrefs were broken, we'll assume this is the
8959                          * correct value and add an entry for this range.
8960                          */
8961                         entry = malloc(sizeof(struct extent_entry));
8962                         if (!entry) {
8963                                 ret = -ENOMEM;
8964                                 goto out;
8965                         }
8966                         memset(entry, 0, sizeof(*entry));
8967                         entry->bytenr = rec->start;
8968                         entry->bytes = rec->nr;
8969                         list_add_tail(&entry->list, &entries);
8970                         nr_entries++;
8971                 }
8972                 entry->count++;
8973                 best = find_most_right_entry(&entries);
8974                 if (!best) {
8975                         fprintf(stderr, "Backrefs and extent record evenly "
8976                                 "split on who is right, this is going to "
8977                                 "require user input to fix bytenr %Lu bytes "
8978                                 "%Lu\n", rec->start, rec->nr);
8979                         ret = -EINVAL;
8980                         goto out;
8981                 }
8982         }
8983
8984         /*
8985          * I don't think this can happen currently as we'll abort() if we catch
8986          * this case higher up, but in case somebody removes that we still can't
8987          * deal with it properly here yet, so just bail out of that's the case.
8988          */
8989         if (best->bytenr != rec->start) {
8990                 fprintf(stderr, "Extent start and backref starts don't match, "
8991                         "please use btrfs-image on this file system and send "
8992                         "it to a btrfs developer so they can make fsck fix "
8993                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8994                         rec->start, rec->nr);
8995                 ret = -EINVAL;
8996                 goto out;
8997         }
8998
8999         /*
9000          * Ok great we all agreed on an extent record, let's go find the real
9001          * references and fix up the ones that don't match.
9002          */
9003         rbtree_postorder_for_each_entry_safe(back, tmp,
9004                                              &rec->backref_tree, node) {
9005                 if (back->full_backref || !back->is_data)
9006                         continue;
9007
9008                 dback = to_data_backref(back);
9009
9010                 /*
9011                  * Still ignoring backrefs that don't have a real ref attached
9012                  * to them.
9013                  */
9014                 if (dback->found_ref == 0)
9015                         continue;
9016
9017                 if (dback->bytes == best->bytes &&
9018                     dback->disk_bytenr == best->bytenr)
9019                         continue;
9020
9021                 ret = repair_ref(info, path, dback, best);
9022                 if (ret)
9023                         goto out;
9024         }
9025
9026         /*
9027          * Ok we messed with the actual refs, which means we need to drop our
9028          * entire cache and go back and rescan.  I know this is a huge pain and
9029          * adds a lot of extra work, but it's the only way to be safe.  Once all
9030          * the backrefs agree we may not need to do anything to the extent
9031          * record itself.
9032          */
9033         ret = -EAGAIN;
9034 out:
9035         while (!list_empty(&entries)) {
9036                 entry = list_entry(entries.next, struct extent_entry, list);
9037                 list_del_init(&entry->list);
9038                 free(entry);
9039         }
9040         return ret;
9041 }
9042
9043 static int process_duplicates(struct cache_tree *extent_cache,
9044                               struct extent_record *rec)
9045 {
9046         struct extent_record *good, *tmp;
9047         struct cache_extent *cache;
9048         int ret;
9049
9050         /*
9051          * If we found a extent record for this extent then return, or if we
9052          * have more than one duplicate we are likely going to need to delete
9053          * something.
9054          */
9055         if (rec->found_rec || rec->num_duplicates > 1)
9056                 return 0;
9057
9058         /* Shouldn't happen but just in case */
9059         BUG_ON(!rec->num_duplicates);
9060
9061         /*
9062          * So this happens if we end up with a backref that doesn't match the
9063          * actual extent entry.  So either the backref is bad or the extent
9064          * entry is bad.  Either way we want to have the extent_record actually
9065          * reflect what we found in the extent_tree, so we need to take the
9066          * duplicate out and use that as the extent_record since the only way we
9067          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9068          */
9069         remove_cache_extent(extent_cache, &rec->cache);
9070
9071         good = to_extent_record(rec->dups.next);
9072         list_del_init(&good->list);
9073         INIT_LIST_HEAD(&good->backrefs);
9074         INIT_LIST_HEAD(&good->dups);
9075         good->cache.start = good->start;
9076         good->cache.size = good->nr;
9077         good->content_checked = 0;
9078         good->owner_ref_checked = 0;
9079         good->num_duplicates = 0;
9080         good->refs = rec->refs;
9081         list_splice_init(&rec->backrefs, &good->backrefs);
9082         while (1) {
9083                 cache = lookup_cache_extent(extent_cache, good->start,
9084                                             good->nr);
9085                 if (!cache)
9086                         break;
9087                 tmp = container_of(cache, struct extent_record, cache);
9088
9089                 /*
9090                  * If we find another overlapping extent and it's found_rec is
9091                  * set then it's a duplicate and we need to try and delete
9092                  * something.
9093                  */
9094                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9095                         if (list_empty(&good->list))
9096                                 list_add_tail(&good->list,
9097                                               &duplicate_extents);
9098                         good->num_duplicates += tmp->num_duplicates + 1;
9099                         list_splice_init(&tmp->dups, &good->dups);
9100                         list_del_init(&tmp->list);
9101                         list_add_tail(&tmp->list, &good->dups);
9102                         remove_cache_extent(extent_cache, &tmp->cache);
9103                         continue;
9104                 }
9105
9106                 /*
9107                  * Ok we have another non extent item backed extent rec, so lets
9108                  * just add it to this extent and carry on like we did above.
9109                  */
9110                 good->refs += tmp->refs;
9111                 list_splice_init(&tmp->backrefs, &good->backrefs);
9112                 remove_cache_extent(extent_cache, &tmp->cache);
9113                 free(tmp);
9114         }
9115         ret = insert_cache_extent(extent_cache, &good->cache);
9116         BUG_ON(ret);
9117         free(rec);
9118         return good->num_duplicates ? 0 : 1;
9119 }
9120
9121 static int delete_duplicate_records(struct btrfs_root *root,
9122                                     struct extent_record *rec)
9123 {
9124         struct btrfs_trans_handle *trans;
9125         LIST_HEAD(delete_list);
9126         struct btrfs_path path;
9127         struct extent_record *tmp, *good, *n;
9128         int nr_del = 0;
9129         int ret = 0, err;
9130         struct btrfs_key key;
9131
9132         btrfs_init_path(&path);
9133
9134         good = rec;
9135         /* Find the record that covers all of the duplicates. */
9136         list_for_each_entry(tmp, &rec->dups, list) {
9137                 if (good->start < tmp->start)
9138                         continue;
9139                 if (good->nr > tmp->nr)
9140                         continue;
9141
9142                 if (tmp->start + tmp->nr < good->start + good->nr) {
9143                         fprintf(stderr, "Ok we have overlapping extents that "
9144                                 "aren't completely covered by each other, this "
9145                                 "is going to require more careful thought.  "
9146                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9147                                 tmp->start, tmp->nr, good->start, good->nr);
9148                         abort();
9149                 }
9150                 good = tmp;
9151         }
9152
9153         if (good != rec)
9154                 list_add_tail(&rec->list, &delete_list);
9155
9156         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9157                 if (tmp == good)
9158                         continue;
9159                 list_move_tail(&tmp->list, &delete_list);
9160         }
9161
9162         root = root->fs_info->extent_root;
9163         trans = btrfs_start_transaction(root, 1);
9164         if (IS_ERR(trans)) {
9165                 ret = PTR_ERR(trans);
9166                 goto out;
9167         }
9168
9169         list_for_each_entry(tmp, &delete_list, list) {
9170                 if (tmp->found_rec == 0)
9171                         continue;
9172                 key.objectid = tmp->start;
9173                 key.type = BTRFS_EXTENT_ITEM_KEY;
9174                 key.offset = tmp->nr;
9175
9176                 /* Shouldn't happen but just in case */
9177                 if (tmp->metadata) {
9178                         fprintf(stderr, "Well this shouldn't happen, extent "
9179                                 "record overlaps but is metadata? "
9180                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9181                         abort();
9182                 }
9183
9184                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9185                 if (ret) {
9186                         if (ret > 0)
9187                                 ret = -EINVAL;
9188                         break;
9189                 }
9190                 ret = btrfs_del_item(trans, root, &path);
9191                 if (ret)
9192                         break;
9193                 btrfs_release_path(&path);
9194                 nr_del++;
9195         }
9196         err = btrfs_commit_transaction(trans, root);
9197         if (err && !ret)
9198                 ret = err;
9199 out:
9200         while (!list_empty(&delete_list)) {
9201                 tmp = to_extent_record(delete_list.next);
9202                 list_del_init(&tmp->list);
9203                 if (tmp == rec)
9204                         continue;
9205                 free(tmp);
9206         }
9207
9208         while (!list_empty(&rec->dups)) {
9209                 tmp = to_extent_record(rec->dups.next);
9210                 list_del_init(&tmp->list);
9211                 free(tmp);
9212         }
9213
9214         btrfs_release_path(&path);
9215
9216         if (!ret && !nr_del)
9217                 rec->num_duplicates = 0;
9218
9219         return ret ? ret : nr_del;
9220 }
9221
9222 static int find_possible_backrefs(struct btrfs_fs_info *info,
9223                                   struct btrfs_path *path,
9224                                   struct cache_tree *extent_cache,
9225                                   struct extent_record *rec)
9226 {
9227         struct btrfs_root *root;
9228         struct extent_backref *back, *tmp;
9229         struct data_backref *dback;
9230         struct cache_extent *cache;
9231         struct btrfs_file_extent_item *fi;
9232         struct btrfs_key key;
9233         u64 bytenr, bytes;
9234         int ret;
9235
9236         rbtree_postorder_for_each_entry_safe(back, tmp,
9237                                              &rec->backref_tree, node) {
9238                 /* Don't care about full backrefs (poor unloved backrefs) */
9239                 if (back->full_backref || !back->is_data)
9240                         continue;
9241
9242                 dback = to_data_backref(back);
9243
9244                 /* We found this one, we don't need to do a lookup */
9245                 if (dback->found_ref)
9246                         continue;
9247
9248                 key.objectid = dback->root;
9249                 key.type = BTRFS_ROOT_ITEM_KEY;
9250                 key.offset = (u64)-1;
9251
9252                 root = btrfs_read_fs_root(info, &key);
9253
9254                 /* No root, definitely a bad ref, skip */
9255                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9256                         continue;
9257                 /* Other err, exit */
9258                 if (IS_ERR(root))
9259                         return PTR_ERR(root);
9260
9261                 key.objectid = dback->owner;
9262                 key.type = BTRFS_EXTENT_DATA_KEY;
9263                 key.offset = dback->offset;
9264                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9265                 if (ret) {
9266                         btrfs_release_path(path);
9267                         if (ret < 0)
9268                                 return ret;
9269                         /* Didn't find it, we can carry on */
9270                         ret = 0;
9271                         continue;
9272                 }
9273
9274                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9275                                     struct btrfs_file_extent_item);
9276                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9277                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9278                 btrfs_release_path(path);
9279                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9280                 if (cache) {
9281                         struct extent_record *tmp;
9282                         tmp = container_of(cache, struct extent_record, cache);
9283
9284                         /*
9285                          * If we found an extent record for the bytenr for this
9286                          * particular backref then we can't add it to our
9287                          * current extent record.  We only want to add backrefs
9288                          * that don't have a corresponding extent item in the
9289                          * extent tree since they likely belong to this record
9290                          * and we need to fix it if it doesn't match bytenrs.
9291                          */
9292                         if  (tmp->found_rec)
9293                                 continue;
9294                 }
9295
9296                 dback->found_ref += 1;
9297                 dback->disk_bytenr = bytenr;
9298                 dback->bytes = bytes;
9299
9300                 /*
9301                  * Set this so the verify backref code knows not to trust the
9302                  * values in this backref.
9303                  */
9304                 back->broken = 1;
9305         }
9306
9307         return 0;
9308 }
9309
9310 /*
9311  * Record orphan data ref into corresponding root.
9312  *
9313  * Return 0 if the extent item contains data ref and recorded.
9314  * Return 1 if the extent item contains no useful data ref
9315  *   On that case, it may contains only shared_dataref or metadata backref
9316  *   or the file extent exists(this should be handled by the extent bytenr
9317  *   recovery routine)
9318  * Return <0 if something goes wrong.
9319  */
9320 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9321                                       struct extent_record *rec)
9322 {
9323         struct btrfs_key key;
9324         struct btrfs_root *dest_root;
9325         struct extent_backref *back, *tmp;
9326         struct data_backref *dback;
9327         struct orphan_data_extent *orphan;
9328         struct btrfs_path path;
9329         int recorded_data_ref = 0;
9330         int ret = 0;
9331
9332         if (rec->metadata)
9333                 return 1;
9334         btrfs_init_path(&path);
9335         rbtree_postorder_for_each_entry_safe(back, tmp,
9336                                              &rec->backref_tree, node) {
9337                 if (back->full_backref || !back->is_data ||
9338                     !back->found_extent_tree)
9339                         continue;
9340                 dback = to_data_backref(back);
9341                 if (dback->found_ref)
9342                         continue;
9343                 key.objectid = dback->root;
9344                 key.type = BTRFS_ROOT_ITEM_KEY;
9345                 key.offset = (u64)-1;
9346
9347                 dest_root = btrfs_read_fs_root(fs_info, &key);
9348
9349                 /* For non-exist root we just skip it */
9350                 if (IS_ERR(dest_root) || !dest_root)
9351                         continue;
9352
9353                 key.objectid = dback->owner;
9354                 key.type = BTRFS_EXTENT_DATA_KEY;
9355                 key.offset = dback->offset;
9356
9357                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9358                 btrfs_release_path(&path);
9359                 /*
9360                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9361                  * we need to record it for inode/file extent rebuild.
9362                  * For ret > 0, we record it only for file extent rebuild.
9363                  * For ret == 0, the file extent exists but only bytenr
9364                  * mismatch, let the original bytenr fix routine to handle,
9365                  * don't record it.
9366                  */
9367                 if (ret == 0)
9368                         continue;
9369                 ret = 0;
9370                 orphan = malloc(sizeof(*orphan));
9371                 if (!orphan) {
9372                         ret = -ENOMEM;
9373                         goto out;
9374                 }
9375                 INIT_LIST_HEAD(&orphan->list);
9376                 orphan->root = dback->root;
9377                 orphan->objectid = dback->owner;
9378                 orphan->offset = dback->offset;
9379                 orphan->disk_bytenr = rec->cache.start;
9380                 orphan->disk_len = rec->cache.size;
9381                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9382                 recorded_data_ref = 1;
9383         }
9384 out:
9385         btrfs_release_path(&path);
9386         if (!ret)
9387                 return !recorded_data_ref;
9388         else
9389                 return ret;
9390 }
9391
9392 /*
9393  * when an incorrect extent item is found, this will delete
9394  * all of the existing entries for it and recreate them
9395  * based on what the tree scan found.
9396  */
9397 static int fixup_extent_refs(struct btrfs_fs_info *info,
9398                              struct cache_tree *extent_cache,
9399                              struct extent_record *rec)
9400 {
9401         struct btrfs_trans_handle *trans = NULL;
9402         int ret;
9403         struct btrfs_path path;
9404         struct cache_extent *cache;
9405         struct extent_backref *back, *tmp;
9406         int allocated = 0;
9407         u64 flags = 0;
9408
9409         if (rec->flag_block_full_backref)
9410                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9411
9412         btrfs_init_path(&path);
9413         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9414                 /*
9415                  * Sometimes the backrefs themselves are so broken they don't
9416                  * get attached to any meaningful rec, so first go back and
9417                  * check any of our backrefs that we couldn't find and throw
9418                  * them into the list if we find the backref so that
9419                  * verify_backrefs can figure out what to do.
9420                  */
9421                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9422                 if (ret < 0)
9423                         goto out;
9424         }
9425
9426         /* step one, make sure all of the backrefs agree */
9427         ret = verify_backrefs(info, &path, rec);
9428         if (ret < 0)
9429                 goto out;
9430
9431         trans = btrfs_start_transaction(info->extent_root, 1);
9432         if (IS_ERR(trans)) {
9433                 ret = PTR_ERR(trans);
9434                 goto out;
9435         }
9436
9437         /* step two, delete all the existing records */
9438         ret = delete_extent_records(trans, info->extent_root, &path,
9439                                     rec->start);
9440
9441         if (ret < 0)
9442                 goto out;
9443
9444         /* was this block corrupt?  If so, don't add references to it */
9445         cache = lookup_cache_extent(info->corrupt_blocks,
9446                                     rec->start, rec->max_size);
9447         if (cache) {
9448                 ret = 0;
9449                 goto out;
9450         }
9451
9452         /* step three, recreate all the refs we did find */
9453         rbtree_postorder_for_each_entry_safe(back, tmp,
9454                                              &rec->backref_tree, node) {
9455                 /*
9456                  * if we didn't find any references, don't create a
9457                  * new extent record
9458                  */
9459                 if (!back->found_ref)
9460                         continue;
9461
9462                 rec->bad_full_backref = 0;
9463                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9464                 allocated = 1;
9465
9466                 if (ret)
9467                         goto out;
9468         }
9469 out:
9470         if (trans) {
9471                 int err = btrfs_commit_transaction(trans, info->extent_root);
9472                 if (!ret)
9473                         ret = err;
9474         }
9475
9476         if (!ret)
9477                 fprintf(stderr, "Repaired extent references for %llu\n",
9478                                 (unsigned long long)rec->start);
9479
9480         btrfs_release_path(&path);
9481         return ret;
9482 }
9483
9484 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9485                               struct extent_record *rec)
9486 {
9487         struct btrfs_trans_handle *trans;
9488         struct btrfs_root *root = fs_info->extent_root;
9489         struct btrfs_path path;
9490         struct btrfs_extent_item *ei;
9491         struct btrfs_key key;
9492         u64 flags;
9493         int ret = 0;
9494
9495         key.objectid = rec->start;
9496         if (rec->metadata) {
9497                 key.type = BTRFS_METADATA_ITEM_KEY;
9498                 key.offset = rec->info_level;
9499         } else {
9500                 key.type = BTRFS_EXTENT_ITEM_KEY;
9501                 key.offset = rec->max_size;
9502         }
9503
9504         trans = btrfs_start_transaction(root, 0);
9505         if (IS_ERR(trans))
9506                 return PTR_ERR(trans);
9507
9508         btrfs_init_path(&path);
9509         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9510         if (ret < 0) {
9511                 btrfs_release_path(&path);
9512                 btrfs_commit_transaction(trans, root);
9513                 return ret;
9514         } else if (ret) {
9515                 fprintf(stderr, "Didn't find extent for %llu\n",
9516                         (unsigned long long)rec->start);
9517                 btrfs_release_path(&path);
9518                 btrfs_commit_transaction(trans, root);
9519                 return -ENOENT;
9520         }
9521
9522         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9523                             struct btrfs_extent_item);
9524         flags = btrfs_extent_flags(path.nodes[0], ei);
9525         if (rec->flag_block_full_backref) {
9526                 fprintf(stderr, "setting full backref on %llu\n",
9527                         (unsigned long long)key.objectid);
9528                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9529         } else {
9530                 fprintf(stderr, "clearing full backref on %llu\n",
9531                         (unsigned long long)key.objectid);
9532                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9533         }
9534         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9535         btrfs_mark_buffer_dirty(path.nodes[0]);
9536         btrfs_release_path(&path);
9537         ret = btrfs_commit_transaction(trans, root);
9538         if (!ret)
9539                 fprintf(stderr, "Repaired extent flags for %llu\n",
9540                                 (unsigned long long)rec->start);
9541
9542         return ret;
9543 }
9544
9545 /* right now we only prune from the extent allocation tree */
9546 static int prune_one_block(struct btrfs_trans_handle *trans,
9547                            struct btrfs_fs_info *info,
9548                            struct btrfs_corrupt_block *corrupt)
9549 {
9550         int ret;
9551         struct btrfs_path path;
9552         struct extent_buffer *eb;
9553         u64 found;
9554         int slot;
9555         int nritems;
9556         int level = corrupt->level + 1;
9557
9558         btrfs_init_path(&path);
9559 again:
9560         /* we want to stop at the parent to our busted block */
9561         path.lowest_level = level;
9562
9563         ret = btrfs_search_slot(trans, info->extent_root,
9564                                 &corrupt->key, &path, -1, 1);
9565
9566         if (ret < 0)
9567                 goto out;
9568
9569         eb = path.nodes[level];
9570         if (!eb) {
9571                 ret = -ENOENT;
9572                 goto out;
9573         }
9574
9575         /*
9576          * hopefully the search gave us the block we want to prune,
9577          * lets try that first
9578          */
9579         slot = path.slots[level];
9580         found =  btrfs_node_blockptr(eb, slot);
9581         if (found == corrupt->cache.start)
9582                 goto del_ptr;
9583
9584         nritems = btrfs_header_nritems(eb);
9585
9586         /* the search failed, lets scan this node and hope we find it */
9587         for (slot = 0; slot < nritems; slot++) {
9588                 found =  btrfs_node_blockptr(eb, slot);
9589                 if (found == corrupt->cache.start)
9590                         goto del_ptr;
9591         }
9592         /*
9593          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9594          * to this block
9595          */
9596         if (eb == info->extent_root->node) {
9597                 ret = -ENOENT;
9598                 goto out;
9599         } else {
9600                 level++;
9601                 btrfs_release_path(&path);
9602                 goto again;
9603         }
9604
9605 del_ptr:
9606         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9607         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9608
9609 out:
9610         btrfs_release_path(&path);
9611         return ret;
9612 }
9613
9614 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9615 {
9616         struct btrfs_trans_handle *trans = NULL;
9617         struct cache_extent *cache;
9618         struct btrfs_corrupt_block *corrupt;
9619
9620         while (1) {
9621                 cache = search_cache_extent(info->corrupt_blocks, 0);
9622                 if (!cache)
9623                         break;
9624                 if (!trans) {
9625                         trans = btrfs_start_transaction(info->extent_root, 1);
9626                         if (IS_ERR(trans))
9627                                 return PTR_ERR(trans);
9628                 }
9629                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9630                 prune_one_block(trans, info, corrupt);
9631                 remove_cache_extent(info->corrupt_blocks, cache);
9632         }
9633         if (trans)
9634                 return btrfs_commit_transaction(trans, info->extent_root);
9635         return 0;
9636 }
9637
9638 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9639 {
9640         struct btrfs_block_group_cache *cache;
9641         u64 start, end;
9642         int ret;
9643
9644         while (1) {
9645                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9646                                             &start, &end, EXTENT_DIRTY);
9647                 if (ret)
9648                         break;
9649                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9650         }
9651
9652         start = 0;
9653         while (1) {
9654                 cache = btrfs_lookup_first_block_group(fs_info, start);
9655                 if (!cache)
9656                         break;
9657                 if (cache->cached)
9658                         cache->cached = 0;
9659                 start = cache->key.objectid + cache->key.offset;
9660         }
9661 }
9662
9663 static int check_extent_refs(struct btrfs_root *root,
9664                              struct cache_tree *extent_cache)
9665 {
9666         struct extent_record *rec;
9667         struct cache_extent *cache;
9668         int ret = 0;
9669         int had_dups = 0;
9670
9671         if (repair) {
9672                 /*
9673                  * if we're doing a repair, we have to make sure
9674                  * we don't allocate from the problem extents.
9675                  * In the worst case, this will be all the
9676                  * extents in the FS
9677                  */
9678                 cache = search_cache_extent(extent_cache, 0);
9679                 while(cache) {
9680                         rec = container_of(cache, struct extent_record, cache);
9681                         set_extent_dirty(root->fs_info->excluded_extents,
9682                                          rec->start,
9683                                          rec->start + rec->max_size - 1);
9684                         cache = next_cache_extent(cache);
9685                 }
9686
9687                 /* pin down all the corrupted blocks too */
9688                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9689                 while(cache) {
9690                         set_extent_dirty(root->fs_info->excluded_extents,
9691                                          cache->start,
9692                                          cache->start + cache->size - 1);
9693                         cache = next_cache_extent(cache);
9694                 }
9695                 prune_corrupt_blocks(root->fs_info);
9696                 reset_cached_block_groups(root->fs_info);
9697         }
9698
9699         reset_cached_block_groups(root->fs_info);
9700
9701         /*
9702          * We need to delete any duplicate entries we find first otherwise we
9703          * could mess up the extent tree when we have backrefs that actually
9704          * belong to a different extent item and not the weird duplicate one.
9705          */
9706         while (repair && !list_empty(&duplicate_extents)) {
9707                 rec = to_extent_record(duplicate_extents.next);
9708                 list_del_init(&rec->list);
9709
9710                 /* Sometimes we can find a backref before we find an actual
9711                  * extent, so we need to process it a little bit to see if there
9712                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9713                  * if this is a backref screwup.  If we need to delete stuff
9714                  * process_duplicates() will return 0, otherwise it will return
9715                  * 1 and we
9716                  */
9717                 if (process_duplicates(extent_cache, rec))
9718                         continue;
9719                 ret = delete_duplicate_records(root, rec);
9720                 if (ret < 0)
9721                         return ret;
9722                 /*
9723                  * delete_duplicate_records will return the number of entries
9724                  * deleted, so if it's greater than 0 then we know we actually
9725                  * did something and we need to remove.
9726                  */
9727                 if (ret)
9728                         had_dups = 1;
9729         }
9730
9731         if (had_dups)
9732                 return -EAGAIN;
9733
9734         while(1) {
9735                 int cur_err = 0;
9736                 int fix = 0;
9737
9738                 cache = search_cache_extent(extent_cache, 0);
9739                 if (!cache)
9740                         break;
9741                 rec = container_of(cache, struct extent_record, cache);
9742                 if (rec->num_duplicates) {
9743                         fprintf(stderr, "extent item %llu has multiple extent "
9744                                 "items\n", (unsigned long long)rec->start);
9745                         cur_err = 1;
9746                 }
9747
9748                 if (rec->refs != rec->extent_item_refs) {
9749                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9750                                 (unsigned long long)rec->start,
9751                                 (unsigned long long)rec->nr);
9752                         fprintf(stderr, "extent item %llu, found %llu\n",
9753                                 (unsigned long long)rec->extent_item_refs,
9754                                 (unsigned long long)rec->refs);
9755                         ret = record_orphan_data_extents(root->fs_info, rec);
9756                         if (ret < 0)
9757                                 goto repair_abort;
9758                         fix = ret;
9759                         cur_err = 1;
9760                 }
9761                 if (all_backpointers_checked(rec, 1)) {
9762                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9763                                 (unsigned long long)rec->start,
9764                                 (unsigned long long)rec->nr);
9765                         fix = 1;
9766                         cur_err = 1;
9767                 }
9768                 if (!rec->owner_ref_checked) {
9769                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9770                                 (unsigned long long)rec->start,
9771                                 (unsigned long long)rec->nr);
9772                         fix = 1;
9773                         cur_err = 1;
9774                 }
9775
9776                 if (repair && fix) {
9777                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9778                         if (ret)
9779                                 goto repair_abort;
9780                 }
9781
9782
9783                 if (rec->bad_full_backref) {
9784                         fprintf(stderr, "bad full backref, on [%llu]\n",
9785                                 (unsigned long long)rec->start);
9786                         if (repair) {
9787                                 ret = fixup_extent_flags(root->fs_info, rec);
9788                                 if (ret)
9789                                         goto repair_abort;
9790                                 fix = 1;
9791                         }
9792                         cur_err = 1;
9793                 }
9794                 /*
9795                  * Although it's not a extent ref's problem, we reuse this
9796                  * routine for error reporting.
9797                  * No repair function yet.
9798                  */
9799                 if (rec->crossing_stripes) {
9800                         fprintf(stderr,
9801                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9802                                 rec->start, rec->start + rec->max_size);
9803                         cur_err = 1;
9804                 }
9805
9806                 if (rec->wrong_chunk_type) {
9807                         fprintf(stderr,
9808                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9809                                 rec->start, rec->start + rec->max_size);
9810                         cur_err = 1;
9811                 }
9812
9813                 remove_cache_extent(extent_cache, cache);
9814                 free_all_extent_backrefs(rec);
9815                 if (!init_extent_tree && repair && (!cur_err || fix))
9816                         clear_extent_dirty(root->fs_info->excluded_extents,
9817                                            rec->start,
9818                                            rec->start + rec->max_size - 1);
9819                 free(rec);
9820         }
9821 repair_abort:
9822         if (repair) {
9823                 if (ret && ret != -EAGAIN) {
9824                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9825                         exit(1);
9826                 } else if (!ret) {
9827                         struct btrfs_trans_handle *trans;
9828
9829                         root = root->fs_info->extent_root;
9830                         trans = btrfs_start_transaction(root, 1);
9831                         if (IS_ERR(trans)) {
9832                                 ret = PTR_ERR(trans);
9833                                 goto repair_abort;
9834                         }
9835
9836                         ret = btrfs_fix_block_accounting(trans, root);
9837                         if (ret)
9838                                 goto repair_abort;
9839                         ret = btrfs_commit_transaction(trans, root);
9840                         if (ret)
9841                                 goto repair_abort;
9842                 }
9843                 return ret;
9844         }
9845         return 0;
9846 }
9847
9848 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9849 {
9850         u64 stripe_size;
9851
9852         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9853                 stripe_size = length;
9854                 stripe_size /= num_stripes;
9855         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9856                 stripe_size = length * 2;
9857                 stripe_size /= num_stripes;
9858         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9859                 stripe_size = length;
9860                 stripe_size /= (num_stripes - 1);
9861         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9862                 stripe_size = length;
9863                 stripe_size /= (num_stripes - 2);
9864         } else {
9865                 stripe_size = length;
9866         }
9867         return stripe_size;
9868 }
9869
9870 /*
9871  * Check the chunk with its block group/dev list ref:
9872  * Return 0 if all refs seems valid.
9873  * Return 1 if part of refs seems valid, need later check for rebuild ref
9874  * like missing block group and needs to search extent tree to rebuild them.
9875  * Return -1 if essential refs are missing and unable to rebuild.
9876  */
9877 static int check_chunk_refs(struct chunk_record *chunk_rec,
9878                             struct block_group_tree *block_group_cache,
9879                             struct device_extent_tree *dev_extent_cache,
9880                             int silent)
9881 {
9882         struct cache_extent *block_group_item;
9883         struct block_group_record *block_group_rec;
9884         struct cache_extent *dev_extent_item;
9885         struct device_extent_record *dev_extent_rec;
9886         u64 devid;
9887         u64 offset;
9888         u64 length;
9889         int metadump_v2 = 0;
9890         int i;
9891         int ret = 0;
9892
9893         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9894                                                chunk_rec->offset,
9895                                                chunk_rec->length);
9896         if (block_group_item) {
9897                 block_group_rec = container_of(block_group_item,
9898                                                struct block_group_record,
9899                                                cache);
9900                 if (chunk_rec->length != block_group_rec->offset ||
9901                     chunk_rec->offset != block_group_rec->objectid ||
9902                     (!metadump_v2 &&
9903                      chunk_rec->type_flags != block_group_rec->flags)) {
9904                         if (!silent)
9905                                 fprintf(stderr,
9906                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9907                                         chunk_rec->objectid,
9908                                         chunk_rec->type,
9909                                         chunk_rec->offset,
9910                                         chunk_rec->length,
9911                                         chunk_rec->offset,
9912                                         chunk_rec->type_flags,
9913                                         block_group_rec->objectid,
9914                                         block_group_rec->type,
9915                                         block_group_rec->offset,
9916                                         block_group_rec->offset,
9917                                         block_group_rec->objectid,
9918                                         block_group_rec->flags);
9919                         ret = -1;
9920                 } else {
9921                         list_del_init(&block_group_rec->list);
9922                         chunk_rec->bg_rec = block_group_rec;
9923                 }
9924         } else {
9925                 if (!silent)
9926                         fprintf(stderr,
9927                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9928                                 chunk_rec->objectid,
9929                                 chunk_rec->type,
9930                                 chunk_rec->offset,
9931                                 chunk_rec->length,
9932                                 chunk_rec->offset,
9933                                 chunk_rec->type_flags);
9934                 ret = 1;
9935         }
9936
9937         if (metadump_v2)
9938                 return ret;
9939
9940         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9941                                     chunk_rec->num_stripes);
9942         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9943                 devid = chunk_rec->stripes[i].devid;
9944                 offset = chunk_rec->stripes[i].offset;
9945                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9946                                                        devid, offset, length);
9947                 if (dev_extent_item) {
9948                         dev_extent_rec = container_of(dev_extent_item,
9949                                                 struct device_extent_record,
9950                                                 cache);
9951                         if (dev_extent_rec->objectid != devid ||
9952                             dev_extent_rec->offset != offset ||
9953                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9954                             dev_extent_rec->length != length) {
9955                                 if (!silent)
9956                                         fprintf(stderr,
9957                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9958                                                 chunk_rec->objectid,
9959                                                 chunk_rec->type,
9960                                                 chunk_rec->offset,
9961                                                 chunk_rec->stripes[i].devid,
9962                                                 chunk_rec->stripes[i].offset,
9963                                                 dev_extent_rec->objectid,
9964                                                 dev_extent_rec->offset,
9965                                                 dev_extent_rec->length);
9966                                 ret = -1;
9967                         } else {
9968                                 list_move(&dev_extent_rec->chunk_list,
9969                                           &chunk_rec->dextents);
9970                         }
9971                 } else {
9972                         if (!silent)
9973                                 fprintf(stderr,
9974                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9975                                         chunk_rec->objectid,
9976                                         chunk_rec->type,
9977                                         chunk_rec->offset,
9978                                         chunk_rec->stripes[i].devid,
9979                                         chunk_rec->stripes[i].offset);
9980                         ret = -1;
9981                 }
9982         }
9983         return ret;
9984 }
9985
9986 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9987 int check_chunks(struct cache_tree *chunk_cache,
9988                  struct block_group_tree *block_group_cache,
9989                  struct device_extent_tree *dev_extent_cache,
9990                  struct list_head *good, struct list_head *bad,
9991                  struct list_head *rebuild, int silent)
9992 {
9993         struct cache_extent *chunk_item;
9994         struct chunk_record *chunk_rec;
9995         struct block_group_record *bg_rec;
9996         struct device_extent_record *dext_rec;
9997         int err;
9998         int ret = 0;
9999
10000         chunk_item = first_cache_extent(chunk_cache);
10001         while (chunk_item) {
10002                 chunk_rec = container_of(chunk_item, struct chunk_record,
10003                                          cache);
10004                 err = check_chunk_refs(chunk_rec, block_group_cache,
10005                                        dev_extent_cache, silent);
10006                 if (err < 0)
10007                         ret = err;
10008                 if (err == 0 && good)
10009                         list_add_tail(&chunk_rec->list, good);
10010                 if (err > 0 && rebuild)
10011                         list_add_tail(&chunk_rec->list, rebuild);
10012                 if (err < 0 && bad)
10013                         list_add_tail(&chunk_rec->list, bad);
10014                 chunk_item = next_cache_extent(chunk_item);
10015         }
10016
10017         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10018                 if (!silent)
10019                         fprintf(stderr,
10020                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10021                                 bg_rec->objectid,
10022                                 bg_rec->offset,
10023                                 bg_rec->flags);
10024                 if (!ret)
10025                         ret = 1;
10026         }
10027
10028         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10029                             chunk_list) {
10030                 if (!silent)
10031                         fprintf(stderr,
10032                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10033                                 dext_rec->objectid,
10034                                 dext_rec->offset,
10035                                 dext_rec->length);
10036                 if (!ret)
10037                         ret = 1;
10038         }
10039         return ret;
10040 }
10041
10042
10043 static int check_device_used(struct device_record *dev_rec,
10044                              struct device_extent_tree *dext_cache)
10045 {
10046         struct cache_extent *cache;
10047         struct device_extent_record *dev_extent_rec;
10048         u64 total_byte = 0;
10049
10050         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10051         while (cache) {
10052                 dev_extent_rec = container_of(cache,
10053                                               struct device_extent_record,
10054                                               cache);
10055                 if (dev_extent_rec->objectid != dev_rec->devid)
10056                         break;
10057
10058                 list_del_init(&dev_extent_rec->device_list);
10059                 total_byte += dev_extent_rec->length;
10060                 cache = next_cache_extent(cache);
10061         }
10062
10063         if (total_byte != dev_rec->byte_used) {
10064                 fprintf(stderr,
10065                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10066                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10067                         dev_rec->type, dev_rec->offset);
10068                 return -1;
10069         } else {
10070                 return 0;
10071         }
10072 }
10073
10074 /* check btrfs_dev_item -> btrfs_dev_extent */
10075 static int check_devices(struct rb_root *dev_cache,
10076                          struct device_extent_tree *dev_extent_cache)
10077 {
10078         struct rb_node *dev_node;
10079         struct device_record *dev_rec;
10080         struct device_extent_record *dext_rec;
10081         int err;
10082         int ret = 0;
10083
10084         dev_node = rb_first(dev_cache);
10085         while (dev_node) {
10086                 dev_rec = container_of(dev_node, struct device_record, node);
10087                 err = check_device_used(dev_rec, dev_extent_cache);
10088                 if (err)
10089                         ret = err;
10090
10091                 dev_node = rb_next(dev_node);
10092         }
10093         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10094                             device_list) {
10095                 fprintf(stderr,
10096                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10097                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10098                 if (!ret)
10099                         ret = 1;
10100         }
10101         return ret;
10102 }
10103
10104 static int add_root_item_to_list(struct list_head *head,
10105                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10106                                   u8 level, u8 drop_level,
10107                                   struct btrfs_key *drop_key)
10108 {
10109
10110         struct root_item_record *ri_rec;
10111         ri_rec = malloc(sizeof(*ri_rec));
10112         if (!ri_rec)
10113                 return -ENOMEM;
10114         ri_rec->bytenr = bytenr;
10115         ri_rec->objectid = objectid;
10116         ri_rec->level = level;
10117         ri_rec->drop_level = drop_level;
10118         ri_rec->last_snapshot = last_snapshot;
10119         if (drop_key)
10120                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10121         list_add_tail(&ri_rec->list, head);
10122
10123         return 0;
10124 }
10125
10126 static void free_root_item_list(struct list_head *list)
10127 {
10128         struct root_item_record *ri_rec;
10129
10130         while (!list_empty(list)) {
10131                 ri_rec = list_first_entry(list, struct root_item_record,
10132                                           list);
10133                 list_del_init(&ri_rec->list);
10134                 free(ri_rec);
10135         }
10136 }
10137
10138 static int deal_root_from_list(struct list_head *list,
10139                                struct btrfs_root *root,
10140                                struct block_info *bits,
10141                                int bits_nr,
10142                                struct cache_tree *pending,
10143                                struct cache_tree *seen,
10144                                struct cache_tree *reada,
10145                                struct cache_tree *nodes,
10146                                struct cache_tree *extent_cache,
10147                                struct cache_tree *chunk_cache,
10148                                struct rb_root *dev_cache,
10149                                struct block_group_tree *block_group_cache,
10150                                struct device_extent_tree *dev_extent_cache)
10151 {
10152         int ret = 0;
10153         u64 last;
10154
10155         while (!list_empty(list)) {
10156                 struct root_item_record *rec;
10157                 struct extent_buffer *buf;
10158                 rec = list_entry(list->next,
10159                                  struct root_item_record, list);
10160                 last = 0;
10161                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10162                 if (!extent_buffer_uptodate(buf)) {
10163                         free_extent_buffer(buf);
10164                         ret = -EIO;
10165                         break;
10166                 }
10167                 ret = add_root_to_pending(buf, extent_cache, pending,
10168                                     seen, nodes, rec->objectid);
10169                 if (ret < 0)
10170                         break;
10171                 /*
10172                  * To rebuild extent tree, we need deal with snapshot
10173                  * one by one, otherwise we deal with node firstly which
10174                  * can maximize readahead.
10175                  */
10176                 while (1) {
10177                         ret = run_next_block(root, bits, bits_nr, &last,
10178                                              pending, seen, reada, nodes,
10179                                              extent_cache, chunk_cache,
10180                                              dev_cache, block_group_cache,
10181                                              dev_extent_cache, rec);
10182                         if (ret != 0)
10183                                 break;
10184                 }
10185                 free_extent_buffer(buf);
10186                 list_del(&rec->list);
10187                 free(rec);
10188                 if (ret < 0)
10189                         break;
10190         }
10191         while (ret >= 0) {
10192                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10193                                      reada, nodes, extent_cache, chunk_cache,
10194                                      dev_cache, block_group_cache,
10195                                      dev_extent_cache, NULL);
10196                 if (ret != 0) {
10197                         if (ret > 0)
10198                                 ret = 0;
10199                         break;
10200                 }
10201         }
10202         return ret;
10203 }
10204
10205 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10206 {
10207         struct rb_root dev_cache;
10208         struct cache_tree chunk_cache;
10209         struct block_group_tree block_group_cache;
10210         struct device_extent_tree dev_extent_cache;
10211         struct cache_tree extent_cache;
10212         struct cache_tree seen;
10213         struct cache_tree pending;
10214         struct cache_tree reada;
10215         struct cache_tree nodes;
10216         struct extent_io_tree excluded_extents;
10217         struct cache_tree corrupt_blocks;
10218         struct btrfs_path path;
10219         struct btrfs_key key;
10220         struct btrfs_key found_key;
10221         int ret, err = 0;
10222         struct block_info *bits;
10223         int bits_nr;
10224         struct extent_buffer *leaf;
10225         int slot;
10226         struct btrfs_root_item ri;
10227         struct list_head dropping_trees;
10228         struct list_head normal_trees;
10229         struct btrfs_root *root1;
10230         struct btrfs_root *root;
10231         u64 objectid;
10232         u8 level;
10233
10234         root = fs_info->fs_root;
10235         dev_cache = RB_ROOT;
10236         cache_tree_init(&chunk_cache);
10237         block_group_tree_init(&block_group_cache);
10238         device_extent_tree_init(&dev_extent_cache);
10239
10240         cache_tree_init(&extent_cache);
10241         cache_tree_init(&seen);
10242         cache_tree_init(&pending);
10243         cache_tree_init(&nodes);
10244         cache_tree_init(&reada);
10245         cache_tree_init(&corrupt_blocks);
10246         extent_io_tree_init(&excluded_extents);
10247         INIT_LIST_HEAD(&dropping_trees);
10248         INIT_LIST_HEAD(&normal_trees);
10249
10250         if (repair) {
10251                 fs_info->excluded_extents = &excluded_extents;
10252                 fs_info->fsck_extent_cache = &extent_cache;
10253                 fs_info->free_extent_hook = free_extent_hook;
10254                 fs_info->corrupt_blocks = &corrupt_blocks;
10255         }
10256
10257         bits_nr = 1024;
10258         bits = malloc(bits_nr * sizeof(struct block_info));
10259         if (!bits) {
10260                 perror("malloc");
10261                 exit(1);
10262         }
10263
10264         if (ctx.progress_enabled) {
10265                 ctx.tp = TASK_EXTENTS;
10266                 task_start(ctx.info);
10267         }
10268
10269 again:
10270         root1 = fs_info->tree_root;
10271         level = btrfs_header_level(root1->node);
10272         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10273                                     root1->node->start, 0, level, 0, NULL);
10274         if (ret < 0)
10275                 goto out;
10276         root1 = fs_info->chunk_root;
10277         level = btrfs_header_level(root1->node);
10278         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10279                                     root1->node->start, 0, level, 0, NULL);
10280         if (ret < 0)
10281                 goto out;
10282         btrfs_init_path(&path);
10283         key.offset = 0;
10284         key.objectid = 0;
10285         key.type = BTRFS_ROOT_ITEM_KEY;
10286         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10287         if (ret < 0)
10288                 goto out;
10289         while(1) {
10290                 leaf = path.nodes[0];
10291                 slot = path.slots[0];
10292                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10293                         ret = btrfs_next_leaf(root, &path);
10294                         if (ret != 0)
10295                                 break;
10296                         leaf = path.nodes[0];
10297                         slot = path.slots[0];
10298                 }
10299                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10300                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10301                         unsigned long offset;
10302                         u64 last_snapshot;
10303
10304                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10305                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10306                         last_snapshot = btrfs_root_last_snapshot(&ri);
10307                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10308                                 level = btrfs_root_level(&ri);
10309                                 ret = add_root_item_to_list(&normal_trees,
10310                                                 found_key.objectid,
10311                                                 btrfs_root_bytenr(&ri),
10312                                                 last_snapshot, level,
10313                                                 0, NULL);
10314                                 if (ret < 0)
10315                                         goto out;
10316                         } else {
10317                                 level = btrfs_root_level(&ri);
10318                                 objectid = found_key.objectid;
10319                                 btrfs_disk_key_to_cpu(&found_key,
10320                                                       &ri.drop_progress);
10321                                 ret = add_root_item_to_list(&dropping_trees,
10322                                                 objectid,
10323                                                 btrfs_root_bytenr(&ri),
10324                                                 last_snapshot, level,
10325                                                 ri.drop_level, &found_key);
10326                                 if (ret < 0)
10327                                         goto out;
10328                         }
10329                 }
10330                 path.slots[0]++;
10331         }
10332         btrfs_release_path(&path);
10333
10334         /*
10335          * check_block can return -EAGAIN if it fixes something, please keep
10336          * this in mind when dealing with return values from these functions, if
10337          * we get -EAGAIN we want to fall through and restart the loop.
10338          */
10339         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10340                                   &seen, &reada, &nodes, &extent_cache,
10341                                   &chunk_cache, &dev_cache, &block_group_cache,
10342                                   &dev_extent_cache);
10343         if (ret < 0) {
10344                 if (ret == -EAGAIN)
10345                         goto loop;
10346                 goto out;
10347         }
10348         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10349                                   &pending, &seen, &reada, &nodes,
10350                                   &extent_cache, &chunk_cache, &dev_cache,
10351                                   &block_group_cache, &dev_extent_cache);
10352         if (ret < 0) {
10353                 if (ret == -EAGAIN)
10354                         goto loop;
10355                 goto out;
10356         }
10357
10358         ret = check_chunks(&chunk_cache, &block_group_cache,
10359                            &dev_extent_cache, NULL, NULL, NULL, 0);
10360         if (ret) {
10361                 if (ret == -EAGAIN)
10362                         goto loop;
10363                 err = ret;
10364         }
10365
10366         ret = check_extent_refs(root, &extent_cache);
10367         if (ret < 0) {
10368                 if (ret == -EAGAIN)
10369                         goto loop;
10370                 goto out;
10371         }
10372
10373         ret = check_devices(&dev_cache, &dev_extent_cache);
10374         if (ret && err)
10375                 ret = err;
10376
10377 out:
10378         task_stop(ctx.info);
10379         if (repair) {
10380                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10381                 extent_io_tree_cleanup(&excluded_extents);
10382                 fs_info->fsck_extent_cache = NULL;
10383                 fs_info->free_extent_hook = NULL;
10384                 fs_info->corrupt_blocks = NULL;
10385                 fs_info->excluded_extents = NULL;
10386         }
10387         free(bits);
10388         free_chunk_cache_tree(&chunk_cache);
10389         free_device_cache_tree(&dev_cache);
10390         free_block_group_tree(&block_group_cache);
10391         free_device_extent_tree(&dev_extent_cache);
10392         free_extent_cache_tree(&seen);
10393         free_extent_cache_tree(&pending);
10394         free_extent_cache_tree(&reada);
10395         free_extent_cache_tree(&nodes);
10396         free_root_item_list(&normal_trees);
10397         free_root_item_list(&dropping_trees);
10398         return ret;
10399 loop:
10400         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10401         free_extent_cache_tree(&seen);
10402         free_extent_cache_tree(&pending);
10403         free_extent_cache_tree(&reada);
10404         free_extent_cache_tree(&nodes);
10405         free_chunk_cache_tree(&chunk_cache);
10406         free_block_group_tree(&block_group_cache);
10407         free_device_cache_tree(&dev_cache);
10408         free_device_extent_tree(&dev_extent_cache);
10409         free_extent_record_cache(&extent_cache);
10410         free_root_item_list(&normal_trees);
10411         free_root_item_list(&dropping_trees);
10412         extent_io_tree_cleanup(&excluded_extents);
10413         goto again;
10414 }
10415
10416 /*
10417  * Check backrefs of a tree block given by @bytenr or @eb.
10418  *
10419  * @root:       the root containing the @bytenr or @eb
10420  * @eb:         tree block extent buffer, can be NULL
10421  * @bytenr:     bytenr of the tree block to search
10422  * @level:      tree level of the tree block
10423  * @owner:      owner of the tree block
10424  *
10425  * Return >0 for any error found and output error message
10426  * Return 0 for no error found
10427  */
10428 static int check_tree_block_ref(struct btrfs_root *root,
10429                                 struct extent_buffer *eb, u64 bytenr,
10430                                 int level, u64 owner)
10431 {
10432         struct btrfs_key key;
10433         struct btrfs_root *extent_root = root->fs_info->extent_root;
10434         struct btrfs_path path;
10435         struct btrfs_extent_item *ei;
10436         struct btrfs_extent_inline_ref *iref;
10437         struct extent_buffer *leaf;
10438         unsigned long end;
10439         unsigned long ptr;
10440         int slot;
10441         int skinny_level;
10442         int type;
10443         u32 nodesize = root->fs_info->nodesize;
10444         u32 item_size;
10445         u64 offset;
10446         int tree_reloc_root = 0;
10447         int found_ref = 0;
10448         int err = 0;
10449         int ret;
10450
10451         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10452             btrfs_header_bytenr(root->node) == bytenr)
10453                 tree_reloc_root = 1;
10454
10455         btrfs_init_path(&path);
10456         key.objectid = bytenr;
10457         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10458                 key.type = BTRFS_METADATA_ITEM_KEY;
10459         else
10460                 key.type = BTRFS_EXTENT_ITEM_KEY;
10461         key.offset = (u64)-1;
10462
10463         /* Search for the backref in extent tree */
10464         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10465         if (ret < 0) {
10466                 err |= BACKREF_MISSING;
10467                 goto out;
10468         }
10469         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10470         if (ret) {
10471                 err |= BACKREF_MISSING;
10472                 goto out;
10473         }
10474
10475         leaf = path.nodes[0];
10476         slot = path.slots[0];
10477         btrfs_item_key_to_cpu(leaf, &key, slot);
10478
10479         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10480
10481         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10482                 skinny_level = (int)key.offset;
10483                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10484         } else {
10485                 struct btrfs_tree_block_info *info;
10486
10487                 info = (struct btrfs_tree_block_info *)(ei + 1);
10488                 skinny_level = btrfs_tree_block_level(leaf, info);
10489                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10490         }
10491
10492         if (eb) {
10493                 u64 header_gen;
10494                 u64 extent_gen;
10495
10496                 if (!(btrfs_extent_flags(leaf, ei) &
10497                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10498                         error(
10499                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10500                                 key.objectid, nodesize,
10501                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10502                         err = BACKREF_MISMATCH;
10503                 }
10504                 header_gen = btrfs_header_generation(eb);
10505                 extent_gen = btrfs_extent_generation(leaf, ei);
10506                 if (header_gen != extent_gen) {
10507                         error(
10508         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10509                                 key.objectid, nodesize, header_gen,
10510                                 extent_gen);
10511                         err = BACKREF_MISMATCH;
10512                 }
10513                 if (level != skinny_level) {
10514                         error(
10515                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10516                                 key.objectid, nodesize, level, skinny_level);
10517                         err = BACKREF_MISMATCH;
10518                 }
10519                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10520                         error(
10521                         "extent[%llu %u] is referred by other roots than %llu",
10522                                 key.objectid, nodesize, root->objectid);
10523                         err = BACKREF_MISMATCH;
10524                 }
10525         }
10526
10527         /*
10528          * Iterate the extent/metadata item to find the exact backref
10529          */
10530         item_size = btrfs_item_size_nr(leaf, slot);
10531         ptr = (unsigned long)iref;
10532         end = (unsigned long)ei + item_size;
10533         while (ptr < end) {
10534                 iref = (struct btrfs_extent_inline_ref *)ptr;
10535                 type = btrfs_extent_inline_ref_type(leaf, iref);
10536                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10537
10538                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10539                         (offset == root->objectid || offset == owner)) {
10540                         found_ref = 1;
10541                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10542                         /*
10543                          * Backref of tree reloc root points to itself, no need
10544                          * to check backref any more.
10545                          */
10546                         if (tree_reloc_root)
10547                                 found_ref = 1;
10548                         else
10549                         /* Check if the backref points to valid referencer */
10550                                 found_ref = !check_tree_block_ref(root, NULL,
10551                                                 offset, level + 1, owner);
10552                 }
10553
10554                 if (found_ref)
10555                         break;
10556                 ptr += btrfs_extent_inline_ref_size(type);
10557         }
10558
10559         /*
10560          * Inlined extent item doesn't have what we need, check
10561          * TREE_BLOCK_REF_KEY
10562          */
10563         if (!found_ref) {
10564                 btrfs_release_path(&path);
10565                 key.objectid = bytenr;
10566                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10567                 key.offset = root->objectid;
10568
10569                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10570                 if (!ret)
10571                         found_ref = 1;
10572         }
10573         if (!found_ref)
10574                 err |= BACKREF_MISSING;
10575 out:
10576         btrfs_release_path(&path);
10577         if (eb && (err & BACKREF_MISSING))
10578                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10579                         bytenr, nodesize, owner, level);
10580         return err;
10581 }
10582
10583 /*
10584  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10585  *
10586  * Return >0 any error found and output error message
10587  * Return 0 for no error found
10588  */
10589 static int check_extent_data_item(struct btrfs_root *root,
10590                                   struct extent_buffer *eb, int slot)
10591 {
10592         struct btrfs_file_extent_item *fi;
10593         struct btrfs_path path;
10594         struct btrfs_root *extent_root = root->fs_info->extent_root;
10595         struct btrfs_key fi_key;
10596         struct btrfs_key dbref_key;
10597         struct extent_buffer *leaf;
10598         struct btrfs_extent_item *ei;
10599         struct btrfs_extent_inline_ref *iref;
10600         struct btrfs_extent_data_ref *dref;
10601         u64 owner;
10602         u64 disk_bytenr;
10603         u64 disk_num_bytes;
10604         u64 extent_num_bytes;
10605         u64 extent_flags;
10606         u32 item_size;
10607         unsigned long end;
10608         unsigned long ptr;
10609         int type;
10610         u64 ref_root;
10611         int found_dbackref = 0;
10612         int err = 0;
10613         int ret;
10614
10615         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10616         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10617
10618         /* Nothing to check for hole and inline data extents */
10619         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10620             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10621                 return 0;
10622
10623         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10624         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10625         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10626
10627         /* Check unaligned disk_num_bytes and num_bytes */
10628         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10629                 error(
10630 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10631                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10632                         root->fs_info->sectorsize);
10633                 err |= BYTES_UNALIGNED;
10634         } else {
10635                 data_bytes_allocated += disk_num_bytes;
10636         }
10637         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10638                 error(
10639 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10640                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10641                         root->fs_info->sectorsize);
10642                 err |= BYTES_UNALIGNED;
10643         } else {
10644                 data_bytes_referenced += extent_num_bytes;
10645         }
10646         owner = btrfs_header_owner(eb);
10647
10648         /* Check the extent item of the file extent in extent tree */
10649         btrfs_init_path(&path);
10650         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10651         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10652         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10653
10654         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10655         if (ret)
10656                 goto out;
10657
10658         leaf = path.nodes[0];
10659         slot = path.slots[0];
10660         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10661
10662         extent_flags = btrfs_extent_flags(leaf, ei);
10663
10664         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10665                 error(
10666                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10667                     disk_bytenr, disk_num_bytes,
10668                     BTRFS_EXTENT_FLAG_DATA);
10669                 err |= BACKREF_MISMATCH;
10670         }
10671
10672         /* Check data backref inside that extent item */
10673         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10674         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10675         ptr = (unsigned long)iref;
10676         end = (unsigned long)ei + item_size;
10677         while (ptr < end) {
10678                 iref = (struct btrfs_extent_inline_ref *)ptr;
10679                 type = btrfs_extent_inline_ref_type(leaf, iref);
10680                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10681
10682                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10683                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10684                         if (ref_root == owner || ref_root == root->objectid)
10685                                 found_dbackref = 1;
10686                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10687                         found_dbackref = !check_tree_block_ref(root, NULL,
10688                                 btrfs_extent_inline_ref_offset(leaf, iref),
10689                                 0, owner);
10690                 }
10691
10692                 if (found_dbackref)
10693                         break;
10694                 ptr += btrfs_extent_inline_ref_size(type);
10695         }
10696
10697         if (!found_dbackref) {
10698                 btrfs_release_path(&path);
10699
10700                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10701                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10702                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10703                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10704                                 fi_key.objectid, fi_key.offset);
10705
10706                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10707                                         &dbref_key, &path, 0, 0);
10708                 if (!ret) {
10709                         found_dbackref = 1;
10710                         goto out;
10711                 }
10712
10713                 btrfs_release_path(&path);
10714
10715                 /*
10716                  * Neither inlined nor EXTENT_DATA_REF found, try
10717                  * SHARED_DATA_REF as last chance.
10718                  */
10719                 dbref_key.objectid = disk_bytenr;
10720                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10721                 dbref_key.offset = eb->start;
10722
10723                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10724                                         &dbref_key, &path, 0, 0);
10725                 if (!ret) {
10726                         found_dbackref = 1;
10727                         goto out;
10728                 }
10729         }
10730
10731 out:
10732         if (!found_dbackref)
10733                 err |= BACKREF_MISSING;
10734         btrfs_release_path(&path);
10735         if (err & BACKREF_MISSING) {
10736                 error("data extent[%llu %llu] backref lost",
10737                       disk_bytenr, disk_num_bytes);
10738         }
10739         return err;
10740 }
10741
10742 /*
10743  * Get real tree block level for the case like shared block
10744  * Return >= 0 as tree level
10745  * Return <0 for error
10746  */
10747 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10748 {
10749         struct extent_buffer *eb;
10750         struct btrfs_path path;
10751         struct btrfs_key key;
10752         struct btrfs_extent_item *ei;
10753         u64 flags;
10754         u64 transid;
10755         u8 backref_level;
10756         u8 header_level;
10757         int ret;
10758
10759         /* Search extent tree for extent generation and level */
10760         key.objectid = bytenr;
10761         key.type = BTRFS_METADATA_ITEM_KEY;
10762         key.offset = (u64)-1;
10763
10764         btrfs_init_path(&path);
10765         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10766         if (ret < 0)
10767                 goto release_out;
10768         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10769         if (ret < 0)
10770                 goto release_out;
10771         if (ret > 0) {
10772                 ret = -ENOENT;
10773                 goto release_out;
10774         }
10775
10776         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10777         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10778                             struct btrfs_extent_item);
10779         flags = btrfs_extent_flags(path.nodes[0], ei);
10780         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10781                 ret = -ENOENT;
10782                 goto release_out;
10783         }
10784
10785         /* Get transid for later read_tree_block() check */
10786         transid = btrfs_extent_generation(path.nodes[0], ei);
10787
10788         /* Get backref level as one source */
10789         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10790                 backref_level = key.offset;
10791         } else {
10792                 struct btrfs_tree_block_info *info;
10793
10794                 info = (struct btrfs_tree_block_info *)(ei + 1);
10795                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10796         }
10797         btrfs_release_path(&path);
10798
10799         /* Get level from tree block as an alternative source */
10800         eb = read_tree_block(fs_info, bytenr, transid);
10801         if (!extent_buffer_uptodate(eb)) {
10802                 free_extent_buffer(eb);
10803                 return -EIO;
10804         }
10805         header_level = btrfs_header_level(eb);
10806         free_extent_buffer(eb);
10807
10808         if (header_level != backref_level)
10809                 return -EIO;
10810         return header_level;
10811
10812 release_out:
10813         btrfs_release_path(&path);
10814         return ret;
10815 }
10816
10817 /*
10818  * Check if a tree block backref is valid (points to a valid tree block)
10819  * if level == -1, level will be resolved
10820  * Return >0 for any error found and print error message
10821  */
10822 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10823                                     u64 bytenr, int level)
10824 {
10825         struct btrfs_root *root;
10826         struct btrfs_key key;
10827         struct btrfs_path path;
10828         struct extent_buffer *eb;
10829         struct extent_buffer *node;
10830         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10831         int err = 0;
10832         int ret;
10833
10834         /* Query level for level == -1 special case */
10835         if (level == -1)
10836                 level = query_tree_block_level(fs_info, bytenr);
10837         if (level < 0) {
10838                 err |= REFERENCER_MISSING;
10839                 goto out;
10840         }
10841
10842         key.objectid = root_id;
10843         key.type = BTRFS_ROOT_ITEM_KEY;
10844         key.offset = (u64)-1;
10845
10846         root = btrfs_read_fs_root(fs_info, &key);
10847         if (IS_ERR(root)) {
10848                 err |= REFERENCER_MISSING;
10849                 goto out;
10850         }
10851
10852         /* Read out the tree block to get item/node key */
10853         eb = read_tree_block(fs_info, bytenr, 0);
10854         if (!extent_buffer_uptodate(eb)) {
10855                 err |= REFERENCER_MISSING;
10856                 free_extent_buffer(eb);
10857                 goto out;
10858         }
10859
10860         /* Empty tree, no need to check key */
10861         if (!btrfs_header_nritems(eb) && !level) {
10862                 free_extent_buffer(eb);
10863                 goto out;
10864         }
10865
10866         if (level)
10867                 btrfs_node_key_to_cpu(eb, &key, 0);
10868         else
10869                 btrfs_item_key_to_cpu(eb, &key, 0);
10870
10871         free_extent_buffer(eb);
10872
10873         btrfs_init_path(&path);
10874         path.lowest_level = level;
10875         /* Search with the first key, to ensure we can reach it */
10876         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10877         if (ret < 0) {
10878                 err |= REFERENCER_MISSING;
10879                 goto release_out;
10880         }
10881
10882         node = path.nodes[level];
10883         if (btrfs_header_bytenr(node) != bytenr) {
10884                 error(
10885         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10886                         bytenr, nodesize, bytenr,
10887                         btrfs_header_bytenr(node));
10888                 err |= REFERENCER_MISMATCH;
10889         }
10890         if (btrfs_header_level(node) != level) {
10891                 error(
10892         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10893                         bytenr, nodesize, level,
10894                         btrfs_header_level(node));
10895                 err |= REFERENCER_MISMATCH;
10896         }
10897
10898 release_out:
10899         btrfs_release_path(&path);
10900 out:
10901         if (err & REFERENCER_MISSING) {
10902                 if (level < 0)
10903                         error("extent [%llu %d] lost referencer (owner: %llu)",
10904                                 bytenr, nodesize, root_id);
10905                 else
10906                         error(
10907                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10908                                 bytenr, nodesize, root_id, level);
10909         }
10910
10911         return err;
10912 }
10913
10914 /*
10915  * Check if tree block @eb is tree reloc root.
10916  * Return 0 if it's not or any problem happens
10917  * Return 1 if it's a tree reloc root
10918  */
10919 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10920                                  struct extent_buffer *eb)
10921 {
10922         struct btrfs_root *tree_reloc_root;
10923         struct btrfs_key key;
10924         u64 bytenr = btrfs_header_bytenr(eb);
10925         u64 owner = btrfs_header_owner(eb);
10926         int ret = 0;
10927
10928         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10929         key.offset = owner;
10930         key.type = BTRFS_ROOT_ITEM_KEY;
10931
10932         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10933         if (IS_ERR(tree_reloc_root))
10934                 return 0;
10935
10936         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10937                 ret = 1;
10938         btrfs_free_fs_root(tree_reloc_root);
10939         return ret;
10940 }
10941
10942 /*
10943  * Check referencer for shared block backref
10944  * If level == -1, this function will resolve the level.
10945  */
10946 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10947                                      u64 parent, u64 bytenr, int level)
10948 {
10949         struct extent_buffer *eb;
10950         u32 nr;
10951         int found_parent = 0;
10952         int i;
10953
10954         eb = read_tree_block(fs_info, parent, 0);
10955         if (!extent_buffer_uptodate(eb))
10956                 goto out;
10957
10958         if (level == -1)
10959                 level = query_tree_block_level(fs_info, bytenr);
10960         if (level < 0)
10961                 goto out;
10962
10963         /* It's possible it's a tree reloc root */
10964         if (parent == bytenr) {
10965                 if (is_tree_reloc_root(fs_info, eb))
10966                         found_parent = 1;
10967                 goto out;
10968         }
10969
10970         if (level + 1 != btrfs_header_level(eb))
10971                 goto out;
10972
10973         nr = btrfs_header_nritems(eb);
10974         for (i = 0; i < nr; i++) {
10975                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10976                         found_parent = 1;
10977                         break;
10978                 }
10979         }
10980 out:
10981         free_extent_buffer(eb);
10982         if (!found_parent) {
10983                 error(
10984         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10985                         bytenr, fs_info->nodesize, parent, level);
10986                 return REFERENCER_MISSING;
10987         }
10988         return 0;
10989 }
10990
10991 /*
10992  * Check referencer for normal (inlined) data ref
10993  * If len == 0, it will be resolved by searching in extent tree
10994  */
10995 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10996                                      u64 root_id, u64 objectid, u64 offset,
10997                                      u64 bytenr, u64 len, u32 count)
10998 {
10999         struct btrfs_root *root;
11000         struct btrfs_root *extent_root = fs_info->extent_root;
11001         struct btrfs_key key;
11002         struct btrfs_path path;
11003         struct extent_buffer *leaf;
11004         struct btrfs_file_extent_item *fi;
11005         u32 found_count = 0;
11006         int slot;
11007         int ret = 0;
11008
11009         if (!len) {
11010                 key.objectid = bytenr;
11011                 key.type = BTRFS_EXTENT_ITEM_KEY;
11012                 key.offset = (u64)-1;
11013
11014                 btrfs_init_path(&path);
11015                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11016                 if (ret < 0)
11017                         goto out;
11018                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11019                 if (ret)
11020                         goto out;
11021                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11022                 if (key.objectid != bytenr ||
11023                     key.type != BTRFS_EXTENT_ITEM_KEY)
11024                         goto out;
11025                 len = key.offset;
11026                 btrfs_release_path(&path);
11027         }
11028         key.objectid = root_id;
11029         key.type = BTRFS_ROOT_ITEM_KEY;
11030         key.offset = (u64)-1;
11031         btrfs_init_path(&path);
11032
11033         root = btrfs_read_fs_root(fs_info, &key);
11034         if (IS_ERR(root))
11035                 goto out;
11036
11037         key.objectid = objectid;
11038         key.type = BTRFS_EXTENT_DATA_KEY;
11039         /*
11040          * It can be nasty as data backref offset is
11041          * file offset - file extent offset, which is smaller or
11042          * equal to original backref offset.  The only special case is
11043          * overflow.  So we need to special check and do further search.
11044          */
11045         key.offset = offset & (1ULL << 63) ? 0 : offset;
11046
11047         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11048         if (ret < 0)
11049                 goto out;
11050
11051         /*
11052          * Search afterwards to get correct one
11053          * NOTE: As we must do a comprehensive check on the data backref to
11054          * make sure the dref count also matches, we must iterate all file
11055          * extents for that inode.
11056          */
11057         while (1) {
11058                 leaf = path.nodes[0];
11059                 slot = path.slots[0];
11060
11061                 if (slot >= btrfs_header_nritems(leaf))
11062                         goto next;
11063                 btrfs_item_key_to_cpu(leaf, &key, slot);
11064                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11065                         break;
11066                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11067                 /*
11068                  * Except normal disk bytenr and disk num bytes, we still
11069                  * need to do extra check on dbackref offset as
11070                  * dbackref offset = file_offset - file_extent_offset
11071                  */
11072                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11073                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11074                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11075                     offset)
11076                         found_count++;
11077
11078 next:
11079                 ret = btrfs_next_item(root, &path);
11080                 if (ret)
11081                         break;
11082         }
11083 out:
11084         btrfs_release_path(&path);
11085         if (found_count != count) {
11086                 error(
11087 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11088                         bytenr, len, root_id, objectid, offset, count, found_count);
11089                 return REFERENCER_MISSING;
11090         }
11091         return 0;
11092 }
11093
11094 /*
11095  * Check if the referencer of a shared data backref exists
11096  */
11097 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11098                                      u64 parent, u64 bytenr)
11099 {
11100         struct extent_buffer *eb;
11101         struct btrfs_key key;
11102         struct btrfs_file_extent_item *fi;
11103         u32 nr;
11104         int found_parent = 0;
11105         int i;
11106
11107         eb = read_tree_block(fs_info, parent, 0);
11108         if (!extent_buffer_uptodate(eb))
11109                 goto out;
11110
11111         nr = btrfs_header_nritems(eb);
11112         for (i = 0; i < nr; i++) {
11113                 btrfs_item_key_to_cpu(eb, &key, i);
11114                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11115                         continue;
11116
11117                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11118                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11119                         continue;
11120
11121                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11122                         found_parent = 1;
11123                         break;
11124                 }
11125         }
11126
11127 out:
11128         free_extent_buffer(eb);
11129         if (!found_parent) {
11130                 error("shared extent %llu referencer lost (parent: %llu)",
11131                         bytenr, parent);
11132                 return REFERENCER_MISSING;
11133         }
11134         return 0;
11135 }
11136
11137 /*
11138  * This function will check a given extent item, including its backref and
11139  * itself (like crossing stripe boundary and type)
11140  *
11141  * Since we don't use extent_record anymore, introduce new error bit
11142  */
11143 static int check_extent_item(struct btrfs_fs_info *fs_info,
11144                              struct extent_buffer *eb, int slot)
11145 {
11146         struct btrfs_extent_item *ei;
11147         struct btrfs_extent_inline_ref *iref;
11148         struct btrfs_extent_data_ref *dref;
11149         unsigned long end;
11150         unsigned long ptr;
11151         int type;
11152         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11153         u32 item_size = btrfs_item_size_nr(eb, slot);
11154         u64 flags;
11155         u64 offset;
11156         int metadata = 0;
11157         int level;
11158         struct btrfs_key key;
11159         int ret;
11160         int err = 0;
11161
11162         btrfs_item_key_to_cpu(eb, &key, slot);
11163         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11164                 bytes_used += key.offset;
11165         else
11166                 bytes_used += nodesize;
11167
11168         if (item_size < sizeof(*ei)) {
11169                 /*
11170                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11171                  * old thing when on disk format is still un-determined.
11172                  * No need to care about it anymore
11173                  */
11174                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11175                 return -ENOTTY;
11176         }
11177
11178         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11179         flags = btrfs_extent_flags(eb, ei);
11180
11181         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11182                 metadata = 1;
11183         if (metadata && check_crossing_stripes(global_info, key.objectid,
11184                                                eb->len)) {
11185                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11186                       key.objectid, key.objectid + nodesize);
11187                 err |= CROSSING_STRIPE_BOUNDARY;
11188         }
11189
11190         ptr = (unsigned long)(ei + 1);
11191
11192         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11193                 /* Old EXTENT_ITEM metadata */
11194                 struct btrfs_tree_block_info *info;
11195
11196                 info = (struct btrfs_tree_block_info *)ptr;
11197                 level = btrfs_tree_block_level(eb, info);
11198                 ptr += sizeof(struct btrfs_tree_block_info);
11199         } else {
11200                 /* New METADATA_ITEM */
11201                 level = key.offset;
11202         }
11203         end = (unsigned long)ei + item_size;
11204
11205 next:
11206         /* Reached extent item end normally */
11207         if (ptr == end)
11208                 goto out;
11209
11210         /* Beyond extent item end, wrong item size */
11211         if (ptr > end) {
11212                 err |= ITEM_SIZE_MISMATCH;
11213                 error("extent item at bytenr %llu slot %d has wrong size",
11214                         eb->start, slot);
11215                 goto out;
11216         }
11217
11218         /* Now check every backref in this extent item */
11219         iref = (struct btrfs_extent_inline_ref *)ptr;
11220         type = btrfs_extent_inline_ref_type(eb, iref);
11221         offset = btrfs_extent_inline_ref_offset(eb, iref);
11222         switch (type) {
11223         case BTRFS_TREE_BLOCK_REF_KEY:
11224                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11225                                                level);
11226                 err |= ret;
11227                 break;
11228         case BTRFS_SHARED_BLOCK_REF_KEY:
11229                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11230                                                  level);
11231                 err |= ret;
11232                 break;
11233         case BTRFS_EXTENT_DATA_REF_KEY:
11234                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11235                 ret = check_extent_data_backref(fs_info,
11236                                 btrfs_extent_data_ref_root(eb, dref),
11237                                 btrfs_extent_data_ref_objectid(eb, dref),
11238                                 btrfs_extent_data_ref_offset(eb, dref),
11239                                 key.objectid, key.offset,
11240                                 btrfs_extent_data_ref_count(eb, dref));
11241                 err |= ret;
11242                 break;
11243         case BTRFS_SHARED_DATA_REF_KEY:
11244                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11245                 err |= ret;
11246                 break;
11247         default:
11248                 error("extent[%llu %d %llu] has unknown ref type: %d",
11249                         key.objectid, key.type, key.offset, type);
11250                 err |= UNKNOWN_TYPE;
11251                 goto out;
11252         }
11253
11254         ptr += btrfs_extent_inline_ref_size(type);
11255         goto next;
11256
11257 out:
11258         return err;
11259 }
11260
11261 /*
11262  * Check if a dev extent item is referred correctly by its chunk
11263  */
11264 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11265                                  struct extent_buffer *eb, int slot)
11266 {
11267         struct btrfs_root *chunk_root = fs_info->chunk_root;
11268         struct btrfs_dev_extent *ptr;
11269         struct btrfs_path path;
11270         struct btrfs_key chunk_key;
11271         struct btrfs_key devext_key;
11272         struct btrfs_chunk *chunk;
11273         struct extent_buffer *l;
11274         int num_stripes;
11275         u64 length;
11276         int i;
11277         int found_chunk = 0;
11278         int ret;
11279
11280         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11281         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11282         length = btrfs_dev_extent_length(eb, ptr);
11283
11284         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11285         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11286         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11287
11288         btrfs_init_path(&path);
11289         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11290         if (ret)
11291                 goto out;
11292
11293         l = path.nodes[0];
11294         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11295         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11296                                       chunk_key.offset);
11297         if (ret < 0)
11298                 goto out;
11299
11300         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11301                 goto out;
11302
11303         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11304         for (i = 0; i < num_stripes; i++) {
11305                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11306                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11307
11308                 if (devid == devext_key.objectid &&
11309                     offset == devext_key.offset) {
11310                         found_chunk = 1;
11311                         break;
11312                 }
11313         }
11314 out:
11315         btrfs_release_path(&path);
11316         if (!found_chunk) {
11317                 error(
11318                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11319                         devext_key.objectid, devext_key.offset, length);
11320                 return REFERENCER_MISSING;
11321         }
11322         return 0;
11323 }
11324
11325 /*
11326  * Check if the used space is correct with the dev item
11327  */
11328 static int check_dev_item(struct btrfs_fs_info *fs_info,
11329                           struct extent_buffer *eb, int slot)
11330 {
11331         struct btrfs_root *dev_root = fs_info->dev_root;
11332         struct btrfs_dev_item *dev_item;
11333         struct btrfs_path path;
11334         struct btrfs_key key;
11335         struct btrfs_dev_extent *ptr;
11336         u64 dev_id;
11337         u64 used;
11338         u64 total = 0;
11339         int ret;
11340
11341         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11342         dev_id = btrfs_device_id(eb, dev_item);
11343         used = btrfs_device_bytes_used(eb, dev_item);
11344
11345         key.objectid = dev_id;
11346         key.type = BTRFS_DEV_EXTENT_KEY;
11347         key.offset = 0;
11348
11349         btrfs_init_path(&path);
11350         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11351         if (ret < 0) {
11352                 btrfs_item_key_to_cpu(eb, &key, slot);
11353                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11354                         key.objectid, key.type, key.offset);
11355                 btrfs_release_path(&path);
11356                 return REFERENCER_MISSING;
11357         }
11358
11359         /* Iterate dev_extents to calculate the used space of a device */
11360         while (1) {
11361                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11362                         goto next;
11363
11364                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11365                 if (key.objectid > dev_id)
11366                         break;
11367                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11368                         goto next;
11369
11370                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11371                                      struct btrfs_dev_extent);
11372                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11373 next:
11374                 ret = btrfs_next_item(dev_root, &path);
11375                 if (ret)
11376                         break;
11377         }
11378         btrfs_release_path(&path);
11379
11380         if (used != total) {
11381                 btrfs_item_key_to_cpu(eb, &key, slot);
11382                 error(
11383 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11384                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11385                         BTRFS_DEV_EXTENT_KEY, dev_id);
11386                 return ACCOUNTING_MISMATCH;
11387         }
11388         return 0;
11389 }
11390
11391 /*
11392  * Check a block group item with its referener (chunk) and its used space
11393  * with extent/metadata item
11394  */
11395 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11396                                   struct extent_buffer *eb, int slot)
11397 {
11398         struct btrfs_root *extent_root = fs_info->extent_root;
11399         struct btrfs_root *chunk_root = fs_info->chunk_root;
11400         struct btrfs_block_group_item *bi;
11401         struct btrfs_block_group_item bg_item;
11402         struct btrfs_path path;
11403         struct btrfs_key bg_key;
11404         struct btrfs_key chunk_key;
11405         struct btrfs_key extent_key;
11406         struct btrfs_chunk *chunk;
11407         struct extent_buffer *leaf;
11408         struct btrfs_extent_item *ei;
11409         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11410         u64 flags;
11411         u64 bg_flags;
11412         u64 used;
11413         u64 total = 0;
11414         int ret;
11415         int err = 0;
11416
11417         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11418         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11419         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11420         used = btrfs_block_group_used(&bg_item);
11421         bg_flags = btrfs_block_group_flags(&bg_item);
11422
11423         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11424         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11425         chunk_key.offset = bg_key.objectid;
11426
11427         btrfs_init_path(&path);
11428         /* Search for the referencer chunk */
11429         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11430         if (ret) {
11431                 error(
11432                 "block group[%llu %llu] did not find the related chunk item",
11433                         bg_key.objectid, bg_key.offset);
11434                 err |= REFERENCER_MISSING;
11435         } else {
11436                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11437                                         struct btrfs_chunk);
11438                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11439                                                 bg_key.offset) {
11440                         error(
11441         "block group[%llu %llu] related chunk item length does not match",
11442                                 bg_key.objectid, bg_key.offset);
11443                         err |= REFERENCER_MISMATCH;
11444                 }
11445         }
11446         btrfs_release_path(&path);
11447
11448         /* Search from the block group bytenr */
11449         extent_key.objectid = bg_key.objectid;
11450         extent_key.type = 0;
11451         extent_key.offset = 0;
11452
11453         btrfs_init_path(&path);
11454         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11455         if (ret < 0)
11456                 goto out;
11457
11458         /* Iterate extent tree to account used space */
11459         while (1) {
11460                 leaf = path.nodes[0];
11461
11462                 /* Search slot can point to the last item beyond leaf nritems */
11463                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11464                         goto next;
11465
11466                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11467                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11468                         break;
11469
11470                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11471                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11472                         goto next;
11473                 if (extent_key.objectid < bg_key.objectid)
11474                         goto next;
11475
11476                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11477                         total += nodesize;
11478                 else
11479                         total += extent_key.offset;
11480
11481                 ei = btrfs_item_ptr(leaf, path.slots[0],
11482                                     struct btrfs_extent_item);
11483                 flags = btrfs_extent_flags(leaf, ei);
11484                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11485                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11486                                 error(
11487                         "bad extent[%llu, %llu) type mismatch with chunk",
11488                                         extent_key.objectid,
11489                                         extent_key.objectid + extent_key.offset);
11490                                 err |= CHUNK_TYPE_MISMATCH;
11491                         }
11492                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11493                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11494                                     BTRFS_BLOCK_GROUP_METADATA))) {
11495                                 error(
11496                         "bad extent[%llu, %llu) type mismatch with chunk",
11497                                         extent_key.objectid,
11498                                         extent_key.objectid + nodesize);
11499                                 err |= CHUNK_TYPE_MISMATCH;
11500                         }
11501                 }
11502 next:
11503                 ret = btrfs_next_item(extent_root, &path);
11504                 if (ret)
11505                         break;
11506         }
11507
11508 out:
11509         btrfs_release_path(&path);
11510
11511         if (total != used) {
11512                 error(
11513                 "block group[%llu %llu] used %llu but extent items used %llu",
11514                         bg_key.objectid, bg_key.offset, used, total);
11515                 err |= ACCOUNTING_MISMATCH;
11516         }
11517         return err;
11518 }
11519
11520 /*
11521  * Check a chunk item.
11522  * Including checking all referred dev_extents and block group
11523  */
11524 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11525                             struct extent_buffer *eb, int slot)
11526 {
11527         struct btrfs_root *extent_root = fs_info->extent_root;
11528         struct btrfs_root *dev_root = fs_info->dev_root;
11529         struct btrfs_path path;
11530         struct btrfs_key chunk_key;
11531         struct btrfs_key bg_key;
11532         struct btrfs_key devext_key;
11533         struct btrfs_chunk *chunk;
11534         struct extent_buffer *leaf;
11535         struct btrfs_block_group_item *bi;
11536         struct btrfs_block_group_item bg_item;
11537         struct btrfs_dev_extent *ptr;
11538         u64 length;
11539         u64 chunk_end;
11540         u64 stripe_len;
11541         u64 type;
11542         int num_stripes;
11543         u64 offset;
11544         u64 objectid;
11545         int i;
11546         int ret;
11547         int err = 0;
11548
11549         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11550         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11551         length = btrfs_chunk_length(eb, chunk);
11552         chunk_end = chunk_key.offset + length;
11553         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11554                                       chunk_key.offset);
11555         if (ret < 0) {
11556                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11557                         chunk_end);
11558                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11559                 goto out;
11560         }
11561         type = btrfs_chunk_type(eb, chunk);
11562
11563         bg_key.objectid = chunk_key.offset;
11564         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11565         bg_key.offset = length;
11566
11567         btrfs_init_path(&path);
11568         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11569         if (ret) {
11570                 error(
11571                 "chunk[%llu %llu) did not find the related block group item",
11572                         chunk_key.offset, chunk_end);
11573                 err |= REFERENCER_MISSING;
11574         } else{
11575                 leaf = path.nodes[0];
11576                 bi = btrfs_item_ptr(leaf, path.slots[0],
11577                                     struct btrfs_block_group_item);
11578                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11579                                    sizeof(bg_item));
11580                 if (btrfs_block_group_flags(&bg_item) != type) {
11581                         error(
11582 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11583                                 chunk_key.offset, chunk_end, type,
11584                                 btrfs_block_group_flags(&bg_item));
11585                         err |= REFERENCER_MISSING;
11586                 }
11587         }
11588
11589         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11590         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11591         for (i = 0; i < num_stripes; i++) {
11592                 btrfs_release_path(&path);
11593                 btrfs_init_path(&path);
11594                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11595                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11596                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11597
11598                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11599                                         0, 0);
11600                 if (ret)
11601                         goto not_match_dev;
11602
11603                 leaf = path.nodes[0];
11604                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11605                                      struct btrfs_dev_extent);
11606                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11607                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11608                 if (objectid != chunk_key.objectid ||
11609                     offset != chunk_key.offset ||
11610                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11611                         goto not_match_dev;
11612                 continue;
11613 not_match_dev:
11614                 err |= BACKREF_MISSING;
11615                 error(
11616                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11617                         chunk_key.objectid, chunk_end, i);
11618                 continue;
11619         }
11620         btrfs_release_path(&path);
11621 out:
11622         return err;
11623 }
11624
11625 /*
11626  * Main entry function to check known items and update related accounting info
11627  */
11628 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11629 {
11630         struct btrfs_fs_info *fs_info = root->fs_info;
11631         struct btrfs_key key;
11632         int slot = 0;
11633         int type;
11634         struct btrfs_extent_data_ref *dref;
11635         int ret;
11636         int err = 0;
11637
11638 next:
11639         btrfs_item_key_to_cpu(eb, &key, slot);
11640         type = key.type;
11641
11642         switch (type) {
11643         case BTRFS_EXTENT_DATA_KEY:
11644                 ret = check_extent_data_item(root, eb, slot);
11645                 err |= ret;
11646                 break;
11647         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11648                 ret = check_block_group_item(fs_info, eb, slot);
11649                 err |= ret;
11650                 break;
11651         case BTRFS_DEV_ITEM_KEY:
11652                 ret = check_dev_item(fs_info, eb, slot);
11653                 err |= ret;
11654                 break;
11655         case BTRFS_CHUNK_ITEM_KEY:
11656                 ret = check_chunk_item(fs_info, eb, slot);
11657                 err |= ret;
11658                 break;
11659         case BTRFS_DEV_EXTENT_KEY:
11660                 ret = check_dev_extent_item(fs_info, eb, slot);
11661                 err |= ret;
11662                 break;
11663         case BTRFS_EXTENT_ITEM_KEY:
11664         case BTRFS_METADATA_ITEM_KEY:
11665                 ret = check_extent_item(fs_info, eb, slot);
11666                 err |= ret;
11667                 break;
11668         case BTRFS_EXTENT_CSUM_KEY:
11669                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11670                 break;
11671         case BTRFS_TREE_BLOCK_REF_KEY:
11672                 ret = check_tree_block_backref(fs_info, key.offset,
11673                                                key.objectid, -1);
11674                 err |= ret;
11675                 break;
11676         case BTRFS_EXTENT_DATA_REF_KEY:
11677                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11678                 ret = check_extent_data_backref(fs_info,
11679                                 btrfs_extent_data_ref_root(eb, dref),
11680                                 btrfs_extent_data_ref_objectid(eb, dref),
11681                                 btrfs_extent_data_ref_offset(eb, dref),
11682                                 key.objectid, 0,
11683                                 btrfs_extent_data_ref_count(eb, dref));
11684                 err |= ret;
11685                 break;
11686         case BTRFS_SHARED_BLOCK_REF_KEY:
11687                 ret = check_shared_block_backref(fs_info, key.offset,
11688                                                  key.objectid, -1);
11689                 err |= ret;
11690                 break;
11691         case BTRFS_SHARED_DATA_REF_KEY:
11692                 ret = check_shared_data_backref(fs_info, key.offset,
11693                                                 key.objectid);
11694                 err |= ret;
11695                 break;
11696         default:
11697                 break;
11698         }
11699
11700         if (++slot < btrfs_header_nritems(eb))
11701                 goto next;
11702
11703         return err;
11704 }
11705
11706 /*
11707  * Helper function for later fs/subvol tree check.  To determine if a tree
11708  * block should be checked.
11709  * This function will ensure only the direct referencer with lowest rootid to
11710  * check a fs/subvolume tree block.
11711  *
11712  * Backref check at extent tree would detect errors like missing subvolume
11713  * tree, so we can do aggressive check to reduce duplicated checks.
11714  */
11715 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11716 {
11717         struct btrfs_root *extent_root = root->fs_info->extent_root;
11718         struct btrfs_key key;
11719         struct btrfs_path path;
11720         struct extent_buffer *leaf;
11721         int slot;
11722         struct btrfs_extent_item *ei;
11723         unsigned long ptr;
11724         unsigned long end;
11725         int type;
11726         u32 item_size;
11727         u64 offset;
11728         struct btrfs_extent_inline_ref *iref;
11729         int ret;
11730
11731         btrfs_init_path(&path);
11732         key.objectid = btrfs_header_bytenr(eb);
11733         key.type = BTRFS_METADATA_ITEM_KEY;
11734         key.offset = (u64)-1;
11735
11736         /*
11737          * Any failure in backref resolving means we can't determine
11738          * whom the tree block belongs to.
11739          * So in that case, we need to check that tree block
11740          */
11741         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11742         if (ret < 0)
11743                 goto need_check;
11744
11745         ret = btrfs_previous_extent_item(extent_root, &path,
11746                                          btrfs_header_bytenr(eb));
11747         if (ret)
11748                 goto need_check;
11749
11750         leaf = path.nodes[0];
11751         slot = path.slots[0];
11752         btrfs_item_key_to_cpu(leaf, &key, slot);
11753         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11754
11755         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11756                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11757         } else {
11758                 struct btrfs_tree_block_info *info;
11759
11760                 info = (struct btrfs_tree_block_info *)(ei + 1);
11761                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11762         }
11763
11764         item_size = btrfs_item_size_nr(leaf, slot);
11765         ptr = (unsigned long)iref;
11766         end = (unsigned long)ei + item_size;
11767         while (ptr < end) {
11768                 iref = (struct btrfs_extent_inline_ref *)ptr;
11769                 type = btrfs_extent_inline_ref_type(leaf, iref);
11770                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11771
11772                 /*
11773                  * We only check the tree block if current root is
11774                  * the lowest referencer of it.
11775                  */
11776                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11777                     offset < root->objectid) {
11778                         btrfs_release_path(&path);
11779                         return 0;
11780                 }
11781
11782                 ptr += btrfs_extent_inline_ref_size(type);
11783         }
11784         /*
11785          * Normally we should also check keyed tree block ref, but that may be
11786          * very time consuming.  Inlined ref should already make us skip a lot
11787          * of refs now.  So skip search keyed tree block ref.
11788          */
11789
11790 need_check:
11791         btrfs_release_path(&path);
11792         return 1;
11793 }
11794
11795 /*
11796  * Traversal function for tree block. We will do:
11797  * 1) Skip shared fs/subvolume tree blocks
11798  * 2) Update related bytes accounting
11799  * 3) Pre-order traversal
11800  */
11801 static int traverse_tree_block(struct btrfs_root *root,
11802                                 struct extent_buffer *node)
11803 {
11804         struct extent_buffer *eb;
11805         struct btrfs_key key;
11806         struct btrfs_key drop_key;
11807         int level;
11808         u64 nr;
11809         int i;
11810         int err = 0;
11811         int ret;
11812
11813         /*
11814          * Skip shared fs/subvolume tree block, in that case they will
11815          * be checked by referencer with lowest rootid
11816          */
11817         if (is_fstree(root->objectid) && !should_check(root, node))
11818                 return 0;
11819
11820         /* Update bytes accounting */
11821         total_btree_bytes += node->len;
11822         if (fs_root_objectid(btrfs_header_owner(node)))
11823                 total_fs_tree_bytes += node->len;
11824         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11825                 total_extent_tree_bytes += node->len;
11826
11827         /* pre-order tranversal, check itself first */
11828         level = btrfs_header_level(node);
11829         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11830                                    btrfs_header_level(node),
11831                                    btrfs_header_owner(node));
11832         err |= ret;
11833         if (err)
11834                 error(
11835         "check %s failed root %llu bytenr %llu level %d, force continue check",
11836                         level ? "node":"leaf", root->objectid,
11837                         btrfs_header_bytenr(node), btrfs_header_level(node));
11838
11839         if (!level) {
11840                 btree_space_waste += btrfs_leaf_free_space(root, node);
11841                 ret = check_leaf_items(root, node);
11842                 err |= ret;
11843                 return err;
11844         }
11845
11846         nr = btrfs_header_nritems(node);
11847         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11848         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11849                 sizeof(struct btrfs_key_ptr);
11850
11851         /* Then check all its children */
11852         for (i = 0; i < nr; i++) {
11853                 u64 blocknr = btrfs_node_blockptr(node, i);
11854
11855                 btrfs_node_key_to_cpu(node, &key, i);
11856                 if (level == root->root_item.drop_level &&
11857                     is_dropped_key(&key, &drop_key))
11858                         continue;
11859
11860                 /*
11861                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11862                  * to call the function itself.
11863                  */
11864                 eb = read_tree_block(root->fs_info, blocknr, 0);
11865                 if (extent_buffer_uptodate(eb)) {
11866                         ret = traverse_tree_block(root, eb);
11867                         err |= ret;
11868                 }
11869                 free_extent_buffer(eb);
11870         }
11871
11872         return err;
11873 }
11874
11875 /*
11876  * Low memory usage version check_chunks_and_extents.
11877  */
11878 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11879 {
11880         struct btrfs_path path;
11881         struct btrfs_key key;
11882         struct btrfs_root *root1;
11883         struct btrfs_root *root;
11884         struct btrfs_root *cur_root;
11885         int err = 0;
11886         int ret;
11887
11888         root = fs_info->fs_root;
11889
11890         root1 = root->fs_info->chunk_root;
11891         ret = traverse_tree_block(root1, root1->node);
11892         err |= ret;
11893
11894         root1 = root->fs_info->tree_root;
11895         ret = traverse_tree_block(root1, root1->node);
11896         err |= ret;
11897
11898         btrfs_init_path(&path);
11899         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11900         key.offset = 0;
11901         key.type = BTRFS_ROOT_ITEM_KEY;
11902
11903         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11904         if (ret) {
11905                 error("cannot find extent treet in tree_root");
11906                 goto out;
11907         }
11908
11909         while (1) {
11910                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11911                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11912                         goto next;
11913                 key.offset = (u64)-1;
11914
11915                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11916                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11917                                         &key);
11918                 else
11919                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11920                 if (IS_ERR(cur_root) || !cur_root) {
11921                         error("failed to read tree: %lld", key.objectid);
11922                         goto next;
11923                 }
11924
11925                 ret = traverse_tree_block(cur_root, cur_root->node);
11926                 err |= ret;
11927
11928                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11929                         btrfs_free_fs_root(cur_root);
11930 next:
11931                 ret = btrfs_next_item(root1, &path);
11932                 if (ret)
11933                         goto out;
11934         }
11935
11936 out:
11937         btrfs_release_path(&path);
11938         return err;
11939 }
11940
11941 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11942 {
11943         int ret;
11944
11945         if (!ctx.progress_enabled)
11946                 fprintf(stderr, "checking extents\n");
11947         if (check_mode == CHECK_MODE_LOWMEM)
11948                 ret = check_chunks_and_extents_v2(fs_info);
11949         else
11950                 ret = check_chunks_and_extents(fs_info);
11951
11952         return ret;
11953 }
11954
11955 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11956                            struct btrfs_root *root, int overwrite)
11957 {
11958         struct extent_buffer *c;
11959         struct extent_buffer *old = root->node;
11960         int level;
11961         int ret;
11962         struct btrfs_disk_key disk_key = {0,0,0};
11963
11964         level = 0;
11965
11966         if (overwrite) {
11967                 c = old;
11968                 extent_buffer_get(c);
11969                 goto init;
11970         }
11971         c = btrfs_alloc_free_block(trans, root,
11972                                    root->fs_info->nodesize,
11973                                    root->root_key.objectid,
11974                                    &disk_key, level, 0, 0);
11975         if (IS_ERR(c)) {
11976                 c = old;
11977                 extent_buffer_get(c);
11978                 overwrite = 1;
11979         }
11980 init:
11981         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11982         btrfs_set_header_level(c, level);
11983         btrfs_set_header_bytenr(c, c->start);
11984         btrfs_set_header_generation(c, trans->transid);
11985         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11986         btrfs_set_header_owner(c, root->root_key.objectid);
11987
11988         write_extent_buffer(c, root->fs_info->fsid,
11989                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11990
11991         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11992                             btrfs_header_chunk_tree_uuid(c),
11993                             BTRFS_UUID_SIZE);
11994
11995         btrfs_mark_buffer_dirty(c);
11996         /*
11997          * this case can happen in the following case:
11998          *
11999          * 1.overwrite previous root.
12000          *
12001          * 2.reinit reloc data root, this is because we skip pin
12002          * down reloc data tree before which means we can allocate
12003          * same block bytenr here.
12004          */
12005         if (old->start == c->start) {
12006                 btrfs_set_root_generation(&root->root_item,
12007                                           trans->transid);
12008                 root->root_item.level = btrfs_header_level(root->node);
12009                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12010                                         &root->root_key, &root->root_item);
12011                 if (ret) {
12012                         free_extent_buffer(c);
12013                         return ret;
12014                 }
12015         }
12016         free_extent_buffer(old);
12017         root->node = c;
12018         add_root_to_dirty_list(root);
12019         return 0;
12020 }
12021
12022 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12023                                 struct extent_buffer *eb, int tree_root)
12024 {
12025         struct extent_buffer *tmp;
12026         struct btrfs_root_item *ri;
12027         struct btrfs_key key;
12028         u64 bytenr;
12029         int level = btrfs_header_level(eb);
12030         int nritems;
12031         int ret;
12032         int i;
12033
12034         /*
12035          * If we have pinned this block before, don't pin it again.
12036          * This can not only avoid forever loop with broken filesystem
12037          * but also give us some speedups.
12038          */
12039         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12040                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12041                 return 0;
12042
12043         btrfs_pin_extent(fs_info, eb->start, eb->len);
12044
12045         nritems = btrfs_header_nritems(eb);
12046         for (i = 0; i < nritems; i++) {
12047                 if (level == 0) {
12048                         btrfs_item_key_to_cpu(eb, &key, i);
12049                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12050                                 continue;
12051                         /* Skip the extent root and reloc roots */
12052                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12053                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12054                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12055                                 continue;
12056                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12057                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12058
12059                         /*
12060                          * If at any point we start needing the real root we
12061                          * will have to build a stump root for the root we are
12062                          * in, but for now this doesn't actually use the root so
12063                          * just pass in extent_root.
12064                          */
12065                         tmp = read_tree_block(fs_info, bytenr, 0);
12066                         if (!extent_buffer_uptodate(tmp)) {
12067                                 fprintf(stderr, "Error reading root block\n");
12068                                 return -EIO;
12069                         }
12070                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12071                         free_extent_buffer(tmp);
12072                         if (ret)
12073                                 return ret;
12074                 } else {
12075                         bytenr = btrfs_node_blockptr(eb, i);
12076
12077                         /* If we aren't the tree root don't read the block */
12078                         if (level == 1 && !tree_root) {
12079                                 btrfs_pin_extent(fs_info, bytenr,
12080                                                 fs_info->nodesize);
12081                                 continue;
12082                         }
12083
12084                         tmp = read_tree_block(fs_info, bytenr, 0);
12085                         if (!extent_buffer_uptodate(tmp)) {
12086                                 fprintf(stderr, "Error reading tree block\n");
12087                                 return -EIO;
12088                         }
12089                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12090                         free_extent_buffer(tmp);
12091                         if (ret)
12092                                 return ret;
12093                 }
12094         }
12095
12096         return 0;
12097 }
12098
12099 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12100 {
12101         int ret;
12102
12103         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12104         if (ret)
12105                 return ret;
12106
12107         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12108 }
12109
12110 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12111 {
12112         struct btrfs_block_group_cache *cache;
12113         struct btrfs_path path;
12114         struct extent_buffer *leaf;
12115         struct btrfs_chunk *chunk;
12116         struct btrfs_key key;
12117         int ret;
12118         u64 start;
12119
12120         btrfs_init_path(&path);
12121         key.objectid = 0;
12122         key.type = BTRFS_CHUNK_ITEM_KEY;
12123         key.offset = 0;
12124         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12125         if (ret < 0) {
12126                 btrfs_release_path(&path);
12127                 return ret;
12128         }
12129
12130         /*
12131          * We do this in case the block groups were screwed up and had alloc
12132          * bits that aren't actually set on the chunks.  This happens with
12133          * restored images every time and could happen in real life I guess.
12134          */
12135         fs_info->avail_data_alloc_bits = 0;
12136         fs_info->avail_metadata_alloc_bits = 0;
12137         fs_info->avail_system_alloc_bits = 0;
12138
12139         /* First we need to create the in-memory block groups */
12140         while (1) {
12141                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12142                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12143                         if (ret < 0) {
12144                                 btrfs_release_path(&path);
12145                                 return ret;
12146                         }
12147                         if (ret) {
12148                                 ret = 0;
12149                                 break;
12150                         }
12151                 }
12152                 leaf = path.nodes[0];
12153                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12154                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12155                         path.slots[0]++;
12156                         continue;
12157                 }
12158
12159                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12160                 btrfs_add_block_group(fs_info, 0,
12161                                       btrfs_chunk_type(leaf, chunk),
12162                                       key.objectid, key.offset,
12163                                       btrfs_chunk_length(leaf, chunk));
12164                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12165                                  key.offset + btrfs_chunk_length(leaf, chunk));
12166                 path.slots[0]++;
12167         }
12168         start = 0;
12169         while (1) {
12170                 cache = btrfs_lookup_first_block_group(fs_info, start);
12171                 if (!cache)
12172                         break;
12173                 cache->cached = 1;
12174                 start = cache->key.objectid + cache->key.offset;
12175         }
12176
12177         btrfs_release_path(&path);
12178         return 0;
12179 }
12180
12181 static int reset_balance(struct btrfs_trans_handle *trans,
12182                          struct btrfs_fs_info *fs_info)
12183 {
12184         struct btrfs_root *root = fs_info->tree_root;
12185         struct btrfs_path path;
12186         struct extent_buffer *leaf;
12187         struct btrfs_key key;
12188         int del_slot, del_nr = 0;
12189         int ret;
12190         int found = 0;
12191
12192         btrfs_init_path(&path);
12193         key.objectid = BTRFS_BALANCE_OBJECTID;
12194         key.type = BTRFS_BALANCE_ITEM_KEY;
12195         key.offset = 0;
12196         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12197         if (ret) {
12198                 if (ret > 0)
12199                         ret = 0;
12200                 if (!ret)
12201                         goto reinit_data_reloc;
12202                 else
12203                         goto out;
12204         }
12205
12206         ret = btrfs_del_item(trans, root, &path);
12207         if (ret)
12208                 goto out;
12209         btrfs_release_path(&path);
12210
12211         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12212         key.type = BTRFS_ROOT_ITEM_KEY;
12213         key.offset = 0;
12214         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12215         if (ret < 0)
12216                 goto out;
12217         while (1) {
12218                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12219                         if (!found)
12220                                 break;
12221
12222                         if (del_nr) {
12223                                 ret = btrfs_del_items(trans, root, &path,
12224                                                       del_slot, del_nr);
12225                                 del_nr = 0;
12226                                 if (ret)
12227                                         goto out;
12228                         }
12229                         key.offset++;
12230                         btrfs_release_path(&path);
12231
12232                         found = 0;
12233                         ret = btrfs_search_slot(trans, root, &key, &path,
12234                                                 -1, 1);
12235                         if (ret < 0)
12236                                 goto out;
12237                         continue;
12238                 }
12239                 found = 1;
12240                 leaf = path.nodes[0];
12241                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12242                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12243                         break;
12244                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12245                         path.slots[0]++;
12246                         continue;
12247                 }
12248                 if (!del_nr) {
12249                         del_slot = path.slots[0];
12250                         del_nr = 1;
12251                 } else {
12252                         del_nr++;
12253                 }
12254                 path.slots[0]++;
12255         }
12256
12257         if (del_nr) {
12258                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12259                 if (ret)
12260                         goto out;
12261         }
12262         btrfs_release_path(&path);
12263
12264 reinit_data_reloc:
12265         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12266         key.type = BTRFS_ROOT_ITEM_KEY;
12267         key.offset = (u64)-1;
12268         root = btrfs_read_fs_root(fs_info, &key);
12269         if (IS_ERR(root)) {
12270                 fprintf(stderr, "Error reading data reloc tree\n");
12271                 ret = PTR_ERR(root);
12272                 goto out;
12273         }
12274         record_root_in_trans(trans, root);
12275         ret = btrfs_fsck_reinit_root(trans, root, 0);
12276         if (ret)
12277                 goto out;
12278         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12279 out:
12280         btrfs_release_path(&path);
12281         return ret;
12282 }
12283
12284 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12285                               struct btrfs_fs_info *fs_info)
12286 {
12287         u64 start = 0;
12288         int ret;
12289
12290         /*
12291          * The only reason we don't do this is because right now we're just
12292          * walking the trees we find and pinning down their bytes, we don't look
12293          * at any of the leaves.  In order to do mixed groups we'd have to check
12294          * the leaves of any fs roots and pin down the bytes for any file
12295          * extents we find.  Not hard but why do it if we don't have to?
12296          */
12297         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12298                 fprintf(stderr, "We don't support re-initing the extent tree "
12299                         "for mixed block groups yet, please notify a btrfs "
12300                         "developer you want to do this so they can add this "
12301                         "functionality.\n");
12302                 return -EINVAL;
12303         }
12304
12305         /*
12306          * first we need to walk all of the trees except the extent tree and pin
12307          * down the bytes that are in use so we don't overwrite any existing
12308          * metadata.
12309          */
12310         ret = pin_metadata_blocks(fs_info);
12311         if (ret) {
12312                 fprintf(stderr, "error pinning down used bytes\n");
12313                 return ret;
12314         }
12315
12316         /*
12317          * Need to drop all the block groups since we're going to recreate all
12318          * of them again.
12319          */
12320         btrfs_free_block_groups(fs_info);
12321         ret = reset_block_groups(fs_info);
12322         if (ret) {
12323                 fprintf(stderr, "error resetting the block groups\n");
12324                 return ret;
12325         }
12326
12327         /* Ok we can allocate now, reinit the extent root */
12328         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12329         if (ret) {
12330                 fprintf(stderr, "extent root initialization failed\n");
12331                 /*
12332                  * When the transaction code is updated we should end the
12333                  * transaction, but for now progs only knows about commit so
12334                  * just return an error.
12335                  */
12336                 return ret;
12337         }
12338
12339         /*
12340          * Now we have all the in-memory block groups setup so we can make
12341          * allocations properly, and the metadata we care about is safe since we
12342          * pinned all of it above.
12343          */
12344         while (1) {
12345                 struct btrfs_block_group_cache *cache;
12346
12347                 cache = btrfs_lookup_first_block_group(fs_info, start);
12348                 if (!cache)
12349                         break;
12350                 start = cache->key.objectid + cache->key.offset;
12351                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12352                                         &cache->key, &cache->item,
12353                                         sizeof(cache->item));
12354                 if (ret) {
12355                         fprintf(stderr, "Error adding block group\n");
12356                         return ret;
12357                 }
12358                 btrfs_extent_post_op(trans, fs_info->extent_root);
12359         }
12360
12361         ret = reset_balance(trans, fs_info);
12362         if (ret)
12363                 fprintf(stderr, "error resetting the pending balance\n");
12364
12365         return ret;
12366 }
12367
12368 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12369 {
12370         struct btrfs_path path;
12371         struct btrfs_trans_handle *trans;
12372         struct btrfs_key key;
12373         int ret;
12374
12375         printf("Recowing metadata block %llu\n", eb->start);
12376         key.objectid = btrfs_header_owner(eb);
12377         key.type = BTRFS_ROOT_ITEM_KEY;
12378         key.offset = (u64)-1;
12379
12380         root = btrfs_read_fs_root(root->fs_info, &key);
12381         if (IS_ERR(root)) {
12382                 fprintf(stderr, "Couldn't find owner root %llu\n",
12383                         key.objectid);
12384                 return PTR_ERR(root);
12385         }
12386
12387         trans = btrfs_start_transaction(root, 1);
12388         if (IS_ERR(trans))
12389                 return PTR_ERR(trans);
12390
12391         btrfs_init_path(&path);
12392         path.lowest_level = btrfs_header_level(eb);
12393         if (path.lowest_level)
12394                 btrfs_node_key_to_cpu(eb, &key, 0);
12395         else
12396                 btrfs_item_key_to_cpu(eb, &key, 0);
12397
12398         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12399         btrfs_commit_transaction(trans, root);
12400         btrfs_release_path(&path);
12401         return ret;
12402 }
12403
12404 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12405 {
12406         struct btrfs_path path;
12407         struct btrfs_trans_handle *trans;
12408         struct btrfs_key key;
12409         int ret;
12410
12411         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12412                bad->key.type, bad->key.offset);
12413         key.objectid = bad->root_id;
12414         key.type = BTRFS_ROOT_ITEM_KEY;
12415         key.offset = (u64)-1;
12416
12417         root = btrfs_read_fs_root(root->fs_info, &key);
12418         if (IS_ERR(root)) {
12419                 fprintf(stderr, "Couldn't find owner root %llu\n",
12420                         key.objectid);
12421                 return PTR_ERR(root);
12422         }
12423
12424         trans = btrfs_start_transaction(root, 1);
12425         if (IS_ERR(trans))
12426                 return PTR_ERR(trans);
12427
12428         btrfs_init_path(&path);
12429         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12430         if (ret) {
12431                 if (ret > 0)
12432                         ret = 0;
12433                 goto out;
12434         }
12435         ret = btrfs_del_item(trans, root, &path);
12436 out:
12437         btrfs_commit_transaction(trans, root);
12438         btrfs_release_path(&path);
12439         return ret;
12440 }
12441
12442 static int zero_log_tree(struct btrfs_root *root)
12443 {
12444         struct btrfs_trans_handle *trans;
12445         int ret;
12446
12447         trans = btrfs_start_transaction(root, 1);
12448         if (IS_ERR(trans)) {
12449                 ret = PTR_ERR(trans);
12450                 return ret;
12451         }
12452         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12453         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12454         ret = btrfs_commit_transaction(trans, root);
12455         return ret;
12456 }
12457
12458 static int populate_csum(struct btrfs_trans_handle *trans,
12459                          struct btrfs_root *csum_root, char *buf, u64 start,
12460                          u64 len)
12461 {
12462         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12463         u64 offset = 0;
12464         u64 sectorsize;
12465         int ret = 0;
12466
12467         while (offset < len) {
12468                 sectorsize = fs_info->sectorsize;
12469                 ret = read_extent_data(fs_info, buf, start + offset,
12470                                        &sectorsize, 0);
12471                 if (ret)
12472                         break;
12473                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12474                                             start + offset, buf, sectorsize);
12475                 if (ret)
12476                         break;
12477                 offset += sectorsize;
12478         }
12479         return ret;
12480 }
12481
12482 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12483                                       struct btrfs_root *csum_root,
12484                                       struct btrfs_root *cur_root)
12485 {
12486         struct btrfs_path path;
12487         struct btrfs_key key;
12488         struct extent_buffer *node;
12489         struct btrfs_file_extent_item *fi;
12490         char *buf = NULL;
12491         u64 start = 0;
12492         u64 len = 0;
12493         int slot = 0;
12494         int ret = 0;
12495
12496         buf = malloc(cur_root->fs_info->sectorsize);
12497         if (!buf)
12498                 return -ENOMEM;
12499
12500         btrfs_init_path(&path);
12501         key.objectid = 0;
12502         key.offset = 0;
12503         key.type = 0;
12504         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12505         if (ret < 0)
12506                 goto out;
12507         /* Iterate all regular file extents and fill its csum */
12508         while (1) {
12509                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12510
12511                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12512                         goto next;
12513                 node = path.nodes[0];
12514                 slot = path.slots[0];
12515                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12516                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12517                         goto next;
12518                 start = btrfs_file_extent_disk_bytenr(node, fi);
12519                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12520
12521                 ret = populate_csum(trans, csum_root, buf, start, len);
12522                 if (ret == -EEXIST)
12523                         ret = 0;
12524                 if (ret < 0)
12525                         goto out;
12526 next:
12527                 /*
12528                  * TODO: if next leaf is corrupted, jump to nearest next valid
12529                  * leaf.
12530                  */
12531                 ret = btrfs_next_item(cur_root, &path);
12532                 if (ret < 0)
12533                         goto out;
12534                 if (ret > 0) {
12535                         ret = 0;
12536                         goto out;
12537                 }
12538         }
12539
12540 out:
12541         btrfs_release_path(&path);
12542         free(buf);
12543         return ret;
12544 }
12545
12546 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12547                                   struct btrfs_root *csum_root)
12548 {
12549         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12550         struct btrfs_path path;
12551         struct btrfs_root *tree_root = fs_info->tree_root;
12552         struct btrfs_root *cur_root;
12553         struct extent_buffer *node;
12554         struct btrfs_key key;
12555         int slot = 0;
12556         int ret = 0;
12557
12558         btrfs_init_path(&path);
12559         key.objectid = BTRFS_FS_TREE_OBJECTID;
12560         key.offset = 0;
12561         key.type = BTRFS_ROOT_ITEM_KEY;
12562         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12563         if (ret < 0)
12564                 goto out;
12565         if (ret > 0) {
12566                 ret = -ENOENT;
12567                 goto out;
12568         }
12569
12570         while (1) {
12571                 node = path.nodes[0];
12572                 slot = path.slots[0];
12573                 btrfs_item_key_to_cpu(node, &key, slot);
12574                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12575                         goto out;
12576                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12577                         goto next;
12578                 if (!is_fstree(key.objectid))
12579                         goto next;
12580                 key.offset = (u64)-1;
12581
12582                 cur_root = btrfs_read_fs_root(fs_info, &key);
12583                 if (IS_ERR(cur_root) || !cur_root) {
12584                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12585                                 key.objectid);
12586                         goto out;
12587                 }
12588                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12589                                 cur_root);
12590                 if (ret < 0)
12591                         goto out;
12592 next:
12593                 ret = btrfs_next_item(tree_root, &path);
12594                 if (ret > 0) {
12595                         ret = 0;
12596                         goto out;
12597                 }
12598                 if (ret < 0)
12599                         goto out;
12600         }
12601
12602 out:
12603         btrfs_release_path(&path);
12604         return ret;
12605 }
12606
12607 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12608                                       struct btrfs_root *csum_root)
12609 {
12610         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12611         struct btrfs_path path;
12612         struct btrfs_extent_item *ei;
12613         struct extent_buffer *leaf;
12614         char *buf;
12615         struct btrfs_key key;
12616         int ret;
12617
12618         btrfs_init_path(&path);
12619         key.objectid = 0;
12620         key.type = BTRFS_EXTENT_ITEM_KEY;
12621         key.offset = 0;
12622         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12623         if (ret < 0) {
12624                 btrfs_release_path(&path);
12625                 return ret;
12626         }
12627
12628         buf = malloc(csum_root->fs_info->sectorsize);
12629         if (!buf) {
12630                 btrfs_release_path(&path);
12631                 return -ENOMEM;
12632         }
12633
12634         while (1) {
12635                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12636                         ret = btrfs_next_leaf(extent_root, &path);
12637                         if (ret < 0)
12638                                 break;
12639                         if (ret) {
12640                                 ret = 0;
12641                                 break;
12642                         }
12643                 }
12644                 leaf = path.nodes[0];
12645
12646                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12647                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12648                         path.slots[0]++;
12649                         continue;
12650                 }
12651
12652                 ei = btrfs_item_ptr(leaf, path.slots[0],
12653                                     struct btrfs_extent_item);
12654                 if (!(btrfs_extent_flags(leaf, ei) &
12655                       BTRFS_EXTENT_FLAG_DATA)) {
12656                         path.slots[0]++;
12657                         continue;
12658                 }
12659
12660                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12661                                     key.offset);
12662                 if (ret)
12663                         break;
12664                 path.slots[0]++;
12665         }
12666
12667         btrfs_release_path(&path);
12668         free(buf);
12669         return ret;
12670 }
12671
12672 /*
12673  * Recalculate the csum and put it into the csum tree.
12674  *
12675  * Extent tree init will wipe out all the extent info, so in that case, we
12676  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12677  * will use fs/subvol trees to init the csum tree.
12678  */
12679 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12680                           struct btrfs_root *csum_root,
12681                           int search_fs_tree)
12682 {
12683         if (search_fs_tree)
12684                 return fill_csum_tree_from_fs(trans, csum_root);
12685         else
12686                 return fill_csum_tree_from_extent(trans, csum_root);
12687 }
12688
12689 static void free_roots_info_cache(void)
12690 {
12691         if (!roots_info_cache)
12692                 return;
12693
12694         while (!cache_tree_empty(roots_info_cache)) {
12695                 struct cache_extent *entry;
12696                 struct root_item_info *rii;
12697
12698                 entry = first_cache_extent(roots_info_cache);
12699                 if (!entry)
12700                         break;
12701                 remove_cache_extent(roots_info_cache, entry);
12702                 rii = container_of(entry, struct root_item_info, cache_extent);
12703                 free(rii);
12704         }
12705
12706         free(roots_info_cache);
12707         roots_info_cache = NULL;
12708 }
12709
12710 static int build_roots_info_cache(struct btrfs_fs_info *info)
12711 {
12712         int ret = 0;
12713         struct btrfs_key key;
12714         struct extent_buffer *leaf;
12715         struct btrfs_path path;
12716
12717         if (!roots_info_cache) {
12718                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12719                 if (!roots_info_cache)
12720                         return -ENOMEM;
12721                 cache_tree_init(roots_info_cache);
12722         }
12723
12724         btrfs_init_path(&path);
12725         key.objectid = 0;
12726         key.type = BTRFS_EXTENT_ITEM_KEY;
12727         key.offset = 0;
12728         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12729         if (ret < 0)
12730                 goto out;
12731         leaf = path.nodes[0];
12732
12733         while (1) {
12734                 struct btrfs_key found_key;
12735                 struct btrfs_extent_item *ei;
12736                 struct btrfs_extent_inline_ref *iref;
12737                 int slot = path.slots[0];
12738                 int type;
12739                 u64 flags;
12740                 u64 root_id;
12741                 u8 level;
12742                 struct cache_extent *entry;
12743                 struct root_item_info *rii;
12744
12745                 if (slot >= btrfs_header_nritems(leaf)) {
12746                         ret = btrfs_next_leaf(info->extent_root, &path);
12747                         if (ret < 0) {
12748                                 break;
12749                         } else if (ret) {
12750                                 ret = 0;
12751                                 break;
12752                         }
12753                         leaf = path.nodes[0];
12754                         slot = path.slots[0];
12755                 }
12756
12757                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12758
12759                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12760                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12761                         goto next;
12762
12763                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12764                 flags = btrfs_extent_flags(leaf, ei);
12765
12766                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12767                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12768                         goto next;
12769
12770                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12771                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12772                         level = found_key.offset;
12773                 } else {
12774                         struct btrfs_tree_block_info *binfo;
12775
12776                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12777                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12778                         level = btrfs_tree_block_level(leaf, binfo);
12779                 }
12780
12781                 /*
12782                  * For a root extent, it must be of the following type and the
12783                  * first (and only one) iref in the item.
12784                  */
12785                 type = btrfs_extent_inline_ref_type(leaf, iref);
12786                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12787                         goto next;
12788
12789                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12790                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12791                 if (!entry) {
12792                         rii = malloc(sizeof(struct root_item_info));
12793                         if (!rii) {
12794                                 ret = -ENOMEM;
12795                                 goto out;
12796                         }
12797                         rii->cache_extent.start = root_id;
12798                         rii->cache_extent.size = 1;
12799                         rii->level = (u8)-1;
12800                         entry = &rii->cache_extent;
12801                         ret = insert_cache_extent(roots_info_cache, entry);
12802                         ASSERT(ret == 0);
12803                 } else {
12804                         rii = container_of(entry, struct root_item_info,
12805                                            cache_extent);
12806                 }
12807
12808                 ASSERT(rii->cache_extent.start == root_id);
12809                 ASSERT(rii->cache_extent.size == 1);
12810
12811                 if (level > rii->level || rii->level == (u8)-1) {
12812                         rii->level = level;
12813                         rii->bytenr = found_key.objectid;
12814                         rii->gen = btrfs_extent_generation(leaf, ei);
12815                         rii->node_count = 1;
12816                 } else if (level == rii->level) {
12817                         rii->node_count++;
12818                 }
12819 next:
12820                 path.slots[0]++;
12821         }
12822
12823 out:
12824         btrfs_release_path(&path);
12825
12826         return ret;
12827 }
12828
12829 static int maybe_repair_root_item(struct btrfs_path *path,
12830                                   const struct btrfs_key *root_key,
12831                                   const int read_only_mode)
12832 {
12833         const u64 root_id = root_key->objectid;
12834         struct cache_extent *entry;
12835         struct root_item_info *rii;
12836         struct btrfs_root_item ri;
12837         unsigned long offset;
12838
12839         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12840         if (!entry) {
12841                 fprintf(stderr,
12842                         "Error: could not find extent items for root %llu\n",
12843                         root_key->objectid);
12844                 return -ENOENT;
12845         }
12846
12847         rii = container_of(entry, struct root_item_info, cache_extent);
12848         ASSERT(rii->cache_extent.start == root_id);
12849         ASSERT(rii->cache_extent.size == 1);
12850
12851         if (rii->node_count != 1) {
12852                 fprintf(stderr,
12853                         "Error: could not find btree root extent for root %llu\n",
12854                         root_id);
12855                 return -ENOENT;
12856         }
12857
12858         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12859         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12860
12861         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12862             btrfs_root_level(&ri) != rii->level ||
12863             btrfs_root_generation(&ri) != rii->gen) {
12864
12865                 /*
12866                  * If we're in repair mode but our caller told us to not update
12867                  * the root item, i.e. just check if it needs to be updated, don't
12868                  * print this message, since the caller will call us again shortly
12869                  * for the same root item without read only mode (the caller will
12870                  * open a transaction first).
12871                  */
12872                 if (!(read_only_mode && repair))
12873                         fprintf(stderr,
12874                                 "%sroot item for root %llu,"
12875                                 " current bytenr %llu, current gen %llu, current level %u,"
12876                                 " new bytenr %llu, new gen %llu, new level %u\n",
12877                                 (read_only_mode ? "" : "fixing "),
12878                                 root_id,
12879                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12880                                 btrfs_root_level(&ri),
12881                                 rii->bytenr, rii->gen, rii->level);
12882
12883                 if (btrfs_root_generation(&ri) > rii->gen) {
12884                         fprintf(stderr,
12885                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12886                                 root_id, btrfs_root_generation(&ri), rii->gen);
12887                         return -EINVAL;
12888                 }
12889
12890                 if (!read_only_mode) {
12891                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12892                         btrfs_set_root_level(&ri, rii->level);
12893                         btrfs_set_root_generation(&ri, rii->gen);
12894                         write_extent_buffer(path->nodes[0], &ri,
12895                                             offset, sizeof(ri));
12896                 }
12897
12898                 return 1;
12899         }
12900
12901         return 0;
12902 }
12903
12904 /*
12905  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12906  * caused read-only snapshots to be corrupted if they were created at a moment
12907  * when the source subvolume/snapshot had orphan items. The issue was that the
12908  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12909  * node instead of the post orphan cleanup root node.
12910  * So this function, and its callees, just detects and fixes those cases. Even
12911  * though the regression was for read-only snapshots, this function applies to
12912  * any snapshot/subvolume root.
12913  * This must be run before any other repair code - not doing it so, makes other
12914  * repair code delete or modify backrefs in the extent tree for example, which
12915  * will result in an inconsistent fs after repairing the root items.
12916  */
12917 static int repair_root_items(struct btrfs_fs_info *info)
12918 {
12919         struct btrfs_path path;
12920         struct btrfs_key key;
12921         struct extent_buffer *leaf;
12922         struct btrfs_trans_handle *trans = NULL;
12923         int ret = 0;
12924         int bad_roots = 0;
12925         int need_trans = 0;
12926
12927         btrfs_init_path(&path);
12928
12929         ret = build_roots_info_cache(info);
12930         if (ret)
12931                 goto out;
12932
12933         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12934         key.type = BTRFS_ROOT_ITEM_KEY;
12935         key.offset = 0;
12936
12937 again:
12938         /*
12939          * Avoid opening and committing transactions if a leaf doesn't have
12940          * any root items that need to be fixed, so that we avoid rotating
12941          * backup roots unnecessarily.
12942          */
12943         if (need_trans) {
12944                 trans = btrfs_start_transaction(info->tree_root, 1);
12945                 if (IS_ERR(trans)) {
12946                         ret = PTR_ERR(trans);
12947                         goto out;
12948                 }
12949         }
12950
12951         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12952                                 0, trans ? 1 : 0);
12953         if (ret < 0)
12954                 goto out;
12955         leaf = path.nodes[0];
12956
12957         while (1) {
12958                 struct btrfs_key found_key;
12959
12960                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12961                         int no_more_keys = find_next_key(&path, &key);
12962
12963                         btrfs_release_path(&path);
12964                         if (trans) {
12965                                 ret = btrfs_commit_transaction(trans,
12966                                                                info->tree_root);
12967                                 trans = NULL;
12968                                 if (ret < 0)
12969                                         goto out;
12970                         }
12971                         need_trans = 0;
12972                         if (no_more_keys)
12973                                 break;
12974                         goto again;
12975                 }
12976
12977                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12978
12979                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12980                         goto next;
12981                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12982                         goto next;
12983
12984                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12985                 if (ret < 0)
12986                         goto out;
12987                 if (ret) {
12988                         if (!trans && repair) {
12989                                 need_trans = 1;
12990                                 key = found_key;
12991                                 btrfs_release_path(&path);
12992                                 goto again;
12993                         }
12994                         bad_roots++;
12995                 }
12996 next:
12997                 path.slots[0]++;
12998         }
12999         ret = 0;
13000 out:
13001         free_roots_info_cache();
13002         btrfs_release_path(&path);
13003         if (trans)
13004                 btrfs_commit_transaction(trans, info->tree_root);
13005         if (ret < 0)
13006                 return ret;
13007
13008         return bad_roots;
13009 }
13010
13011 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13012 {
13013         struct btrfs_trans_handle *trans;
13014         struct btrfs_block_group_cache *bg_cache;
13015         u64 current = 0;
13016         int ret = 0;
13017
13018         /* Clear all free space cache inodes and its extent data */
13019         while (1) {
13020                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13021                 if (!bg_cache)
13022                         break;
13023                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13024                 if (ret < 0)
13025                         return ret;
13026                 current = bg_cache->key.objectid + bg_cache->key.offset;
13027         }
13028
13029         /* Don't forget to set cache_generation to -1 */
13030         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13031         if (IS_ERR(trans)) {
13032                 error("failed to update super block cache generation");
13033                 return PTR_ERR(trans);
13034         }
13035         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13036         btrfs_commit_transaction(trans, fs_info->tree_root);
13037
13038         return ret;
13039 }
13040
13041 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13042                 int clear_version)
13043 {
13044         int ret = 0;
13045
13046         if (clear_version == 1) {
13047                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13048                         error(
13049                 "free space cache v2 detected, use --clear-space-cache v2");
13050                         ret = 1;
13051                         goto close_out;
13052                 }
13053                 printf("Clearing free space cache\n");
13054                 ret = clear_free_space_cache(fs_info);
13055                 if (ret) {
13056                         error("failed to clear free space cache");
13057                         ret = 1;
13058                 } else {
13059                         printf("Free space cache cleared\n");
13060                 }
13061         } else if (clear_version == 2) {
13062                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13063                         printf("no free space cache v2 to clear\n");
13064                         ret = 0;
13065                         goto close_out;
13066                 }
13067                 printf("Clear free space cache v2\n");
13068                 ret = btrfs_clear_free_space_tree(fs_info);
13069                 if (ret) {
13070                         error("failed to clear free space cache v2: %d", ret);
13071                         ret = 1;
13072                 } else {
13073                         printf("free space cache v2 cleared\n");
13074                 }
13075         }
13076 close_out:
13077         return ret;
13078 }
13079
13080 const char * const cmd_check_usage[] = {
13081         "btrfs check [options] <device>",
13082         "Check structural integrity of a filesystem (unmounted).",
13083         "Check structural integrity of an unmounted filesystem. Verify internal",
13084         "trees' consistency and item connectivity. In the repair mode try to",
13085         "fix the problems found. ",
13086         "WARNING: the repair mode is considered dangerous",
13087         "",
13088         "-s|--super <superblock>     use this superblock copy",
13089         "-b|--backup                 use the first valid backup root copy",
13090         "--force                     skip mount checks, repair is not possible",
13091         "--repair                    try to repair the filesystem",
13092         "--readonly                  run in read-only mode (default)",
13093         "--init-csum-tree            create a new CRC tree",
13094         "--init-extent-tree          create a new extent tree",
13095         "--mode <MODE>               allows choice of memory/IO trade-offs",
13096         "                            where MODE is one of:",
13097         "                            original - read inodes and extents to memory (requires",
13098         "                                       more memory, does less IO)",
13099         "                            lowmem   - try to use less memory but read blocks again",
13100         "                                       when needed",
13101         "--check-data-csum           verify checksums of data blocks",
13102         "-Q|--qgroup-report          print a report on qgroup consistency",
13103         "-E|--subvol-extents <subvolid>",
13104         "                            print subvolume extents and sharing state",
13105         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13106         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13107         "-p|--progress               indicate progress",
13108         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13109         NULL
13110 };
13111
13112 int cmd_check(int argc, char **argv)
13113 {
13114         struct cache_tree root_cache;
13115         struct btrfs_root *root;
13116         struct btrfs_fs_info *info;
13117         u64 bytenr = 0;
13118         u64 subvolid = 0;
13119         u64 tree_root_bytenr = 0;
13120         u64 chunk_root_bytenr = 0;
13121         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13122         int ret = 0;
13123         int err = 0;
13124         u64 num;
13125         int init_csum_tree = 0;
13126         int readonly = 0;
13127         int clear_space_cache = 0;
13128         int qgroup_report = 0;
13129         int qgroups_repaired = 0;
13130         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13131         int force = 0;
13132
13133         while(1) {
13134                 int c;
13135                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13136                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13137                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13138                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13139                         GETOPT_VAL_FORCE };
13140                 static const struct option long_options[] = {
13141                         { "super", required_argument, NULL, 's' },
13142                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13143                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13144                         { "init-csum-tree", no_argument, NULL,
13145                                 GETOPT_VAL_INIT_CSUM },
13146                         { "init-extent-tree", no_argument, NULL,
13147                                 GETOPT_VAL_INIT_EXTENT },
13148                         { "check-data-csum", no_argument, NULL,
13149                                 GETOPT_VAL_CHECK_CSUM },
13150                         { "backup", no_argument, NULL, 'b' },
13151                         { "subvol-extents", required_argument, NULL, 'E' },
13152                         { "qgroup-report", no_argument, NULL, 'Q' },
13153                         { "tree-root", required_argument, NULL, 'r' },
13154                         { "chunk-root", required_argument, NULL,
13155                                 GETOPT_VAL_CHUNK_TREE },
13156                         { "progress", no_argument, NULL, 'p' },
13157                         { "mode", required_argument, NULL,
13158                                 GETOPT_VAL_MODE },
13159                         { "clear-space-cache", required_argument, NULL,
13160                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13161                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13162                         { NULL, 0, NULL, 0}
13163                 };
13164
13165                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13166                 if (c < 0)
13167                         break;
13168                 switch(c) {
13169                         case 'a': /* ignored */ break;
13170                         case 'b':
13171                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13172                                 break;
13173                         case 's':
13174                                 num = arg_strtou64(optarg);
13175                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13176                                         error(
13177                                         "super mirror should be less than %d",
13178                                                 BTRFS_SUPER_MIRROR_MAX);
13179                                         exit(1);
13180                                 }
13181                                 bytenr = btrfs_sb_offset(((int)num));
13182                                 printf("using SB copy %llu, bytenr %llu\n", num,
13183                                        (unsigned long long)bytenr);
13184                                 break;
13185                         case 'Q':
13186                                 qgroup_report = 1;
13187                                 break;
13188                         case 'E':
13189                                 subvolid = arg_strtou64(optarg);
13190                                 break;
13191                         case 'r':
13192                                 tree_root_bytenr = arg_strtou64(optarg);
13193                                 break;
13194                         case GETOPT_VAL_CHUNK_TREE:
13195                                 chunk_root_bytenr = arg_strtou64(optarg);
13196                                 break;
13197                         case 'p':
13198                                 ctx.progress_enabled = true;
13199                                 break;
13200                         case '?':
13201                         case 'h':
13202                                 usage(cmd_check_usage);
13203                         case GETOPT_VAL_REPAIR:
13204                                 printf("enabling repair mode\n");
13205                                 repair = 1;
13206                                 ctree_flags |= OPEN_CTREE_WRITES;
13207                                 break;
13208                         case GETOPT_VAL_READONLY:
13209                                 readonly = 1;
13210                                 break;
13211                         case GETOPT_VAL_INIT_CSUM:
13212                                 printf("Creating a new CRC tree\n");
13213                                 init_csum_tree = 1;
13214                                 repair = 1;
13215                                 ctree_flags |= OPEN_CTREE_WRITES;
13216                                 break;
13217                         case GETOPT_VAL_INIT_EXTENT:
13218                                 init_extent_tree = 1;
13219                                 ctree_flags |= (OPEN_CTREE_WRITES |
13220                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13221                                 repair = 1;
13222                                 break;
13223                         case GETOPT_VAL_CHECK_CSUM:
13224                                 check_data_csum = 1;
13225                                 break;
13226                         case GETOPT_VAL_MODE:
13227                                 check_mode = parse_check_mode(optarg);
13228                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13229                                         error("unknown mode: %s", optarg);
13230                                         exit(1);
13231                                 }
13232                                 break;
13233                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13234                                 if (strcmp(optarg, "v1") == 0) {
13235                                         clear_space_cache = 1;
13236                                 } else if (strcmp(optarg, "v2") == 0) {
13237                                         clear_space_cache = 2;
13238                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13239                                 } else {
13240                                         error(
13241                 "invalid argument to --clear-space-cache, must be v1 or v2");
13242                                         exit(1);
13243                                 }
13244                                 ctree_flags |= OPEN_CTREE_WRITES;
13245                                 break;
13246                         case GETOPT_VAL_FORCE:
13247                                 force = 1;
13248                                 break;
13249                 }
13250         }
13251
13252         if (check_argc_exact(argc - optind, 1))
13253                 usage(cmd_check_usage);
13254
13255         if (ctx.progress_enabled) {
13256                 ctx.tp = TASK_NOTHING;
13257                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13258         }
13259
13260         /* This check is the only reason for --readonly to exist */
13261         if (readonly && repair) {
13262                 error("repair options are not compatible with --readonly");
13263                 exit(1);
13264         }
13265
13266         /*
13267          * experimental and dangerous
13268          */
13269         if (repair && check_mode == CHECK_MODE_LOWMEM)
13270                 warning("low-memory mode repair support is only partial");
13271
13272         radix_tree_init();
13273         cache_tree_init(&root_cache);
13274
13275         ret = check_mounted(argv[optind]);
13276         if (!force) {
13277                 if (ret < 0) {
13278                         error("could not check mount status: %s",
13279                                         strerror(-ret));
13280                         err |= !!ret;
13281                         goto err_out;
13282                 } else if (ret) {
13283                         error(
13284 "%s is currently mounted, use --force if you really intend to check the filesystem",
13285                                 argv[optind]);
13286                         ret = -EBUSY;
13287                         err |= !!ret;
13288                         goto err_out;
13289                 }
13290         } else {
13291                 if (repair) {
13292                         error("repair and --force is not yet supported");
13293                         ret = 1;
13294                         err |= !!ret;
13295                         goto err_out;
13296                 }
13297                 if (ret < 0) {
13298                         warning(
13299 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13300                                 argv[optind]);
13301                 } else if (ret) {
13302                         warning(
13303                         "filesystem mounted, continuing because of --force");
13304                 }
13305                 /* A block device is mounted in exclusive mode by kernel */
13306                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13307         }
13308
13309         /* only allow partial opening under repair mode */
13310         if (repair)
13311                 ctree_flags |= OPEN_CTREE_PARTIAL;
13312
13313         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13314                                   chunk_root_bytenr, ctree_flags);
13315         if (!info) {
13316                 error("cannot open file system");
13317                 ret = -EIO;
13318                 err |= !!ret;
13319                 goto err_out;
13320         }
13321
13322         global_info = info;
13323         root = info->fs_root;
13324         uuid_unparse(info->super_copy->fsid, uuidbuf);
13325
13326         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13327
13328         /*
13329          * Check the bare minimum before starting anything else that could rely
13330          * on it, namely the tree roots, any local consistency checks
13331          */
13332         if (!extent_buffer_uptodate(info->tree_root->node) ||
13333             !extent_buffer_uptodate(info->dev_root->node) ||
13334             !extent_buffer_uptodate(info->chunk_root->node)) {
13335                 error("critical roots corrupted, unable to check the filesystem");
13336                 err |= !!ret;
13337                 ret = -EIO;
13338                 goto close_out;
13339         }
13340
13341         if (clear_space_cache) {
13342                 ret = do_clear_free_space_cache(info, clear_space_cache);
13343                 err |= !!ret;
13344                 goto close_out;
13345         }
13346
13347         /*
13348          * repair mode will force us to commit transaction which
13349          * will make us fail to load log tree when mounting.
13350          */
13351         if (repair && btrfs_super_log_root(info->super_copy)) {
13352                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13353                 if (!ret) {
13354                         ret = 1;
13355                         err |= !!ret;
13356                         goto close_out;
13357                 }
13358                 ret = zero_log_tree(root);
13359                 err |= !!ret;
13360                 if (ret) {
13361                         error("failed to zero log tree: %d", ret);
13362                         goto close_out;
13363                 }
13364         }
13365
13366         if (qgroup_report) {
13367                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13368                        uuidbuf);
13369                 ret = qgroup_verify_all(info);
13370                 err |= !!ret;
13371                 if (ret == 0)
13372                         report_qgroups(1);
13373                 goto close_out;
13374         }
13375         if (subvolid) {
13376                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13377                        subvolid, argv[optind], uuidbuf);
13378                 ret = print_extent_state(info, subvolid);
13379                 err |= !!ret;
13380                 goto close_out;
13381         }
13382
13383         if (init_extent_tree || init_csum_tree) {
13384                 struct btrfs_trans_handle *trans;
13385
13386                 trans = btrfs_start_transaction(info->extent_root, 0);
13387                 if (IS_ERR(trans)) {
13388                         error("error starting transaction");
13389                         ret = PTR_ERR(trans);
13390                         err |= !!ret;
13391                         goto close_out;
13392                 }
13393
13394                 if (init_extent_tree) {
13395                         printf("Creating a new extent tree\n");
13396                         ret = reinit_extent_tree(trans, info);
13397                         err |= !!ret;
13398                         if (ret)
13399                                 goto close_out;
13400                 }
13401
13402                 if (init_csum_tree) {
13403                         printf("Reinitialize checksum tree\n");
13404                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13405                         if (ret) {
13406                                 error("checksum tree initialization failed: %d",
13407                                                 ret);
13408                                 ret = -EIO;
13409                                 err |= !!ret;
13410                                 goto close_out;
13411                         }
13412
13413                         ret = fill_csum_tree(trans, info->csum_root,
13414                                              init_extent_tree);
13415                         err |= !!ret;
13416                         if (ret) {
13417                                 error("checksum tree refilling failed: %d", ret);
13418                                 return -EIO;
13419                         }
13420                 }
13421                 /*
13422                  * Ok now we commit and run the normal fsck, which will add
13423                  * extent entries for all of the items it finds.
13424                  */
13425                 ret = btrfs_commit_transaction(trans, info->extent_root);
13426                 err |= !!ret;
13427                 if (ret)
13428                         goto close_out;
13429         }
13430         if (!extent_buffer_uptodate(info->extent_root->node)) {
13431                 error("critical: extent_root, unable to check the filesystem");
13432                 ret = -EIO;
13433                 err |= !!ret;
13434                 goto close_out;
13435         }
13436         if (!extent_buffer_uptodate(info->csum_root->node)) {
13437                 error("critical: csum_root, unable to check the filesystem");
13438                 ret = -EIO;
13439                 err |= !!ret;
13440                 goto close_out;
13441         }
13442
13443         ret = do_check_chunks_and_extents(info);
13444         err |= !!ret;
13445         if (ret)
13446                 error(
13447                 "errors found in extent allocation tree or chunk allocation");
13448
13449         ret = repair_root_items(info);
13450         err |= !!ret;
13451         if (ret < 0) {
13452                 error("failed to repair root items: %s", strerror(-ret));
13453                 goto close_out;
13454         }
13455         if (repair) {
13456                 fprintf(stderr, "Fixed %d roots.\n", ret);
13457                 ret = 0;
13458         } else if (ret > 0) {
13459                 fprintf(stderr,
13460                        "Found %d roots with an outdated root item.\n",
13461                        ret);
13462                 fprintf(stderr,
13463                         "Please run a filesystem check with the option --repair to fix them.\n");
13464                 ret = 1;
13465                 err |= !!ret;
13466                 goto close_out;
13467         }
13468
13469         if (!ctx.progress_enabled) {
13470                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13471                         fprintf(stderr, "checking free space tree\n");
13472                 else
13473                         fprintf(stderr, "checking free space cache\n");
13474         }
13475         ret = check_space_cache(root);
13476         err |= !!ret;
13477         if (ret) {
13478                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13479                         error("errors found in free space tree");
13480                 else
13481                         error("errors found in free space cache");
13482                 goto out;
13483         }
13484
13485         /*
13486          * We used to have to have these hole extents in between our real
13487          * extents so if we don't have this flag set we need to make sure there
13488          * are no gaps in the file extents for inodes, otherwise we can just
13489          * ignore it when this happens.
13490          */
13491         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13492         ret = do_check_fs_roots(info, &root_cache);
13493         err |= !!ret;
13494         if (ret) {
13495                 error("errors found in fs roots");
13496                 goto out;
13497         }
13498
13499         fprintf(stderr, "checking csums\n");
13500         ret = check_csums(root);
13501         err |= !!ret;
13502         if (ret) {
13503                 error("errors found in csum tree");
13504                 goto out;
13505         }
13506
13507         fprintf(stderr, "checking root refs\n");
13508         /* For low memory mode, check_fs_roots_v2 handles root refs */
13509         if (check_mode != CHECK_MODE_LOWMEM) {
13510                 ret = check_root_refs(root, &root_cache);
13511                 err |= !!ret;
13512                 if (ret) {
13513                         error("errors found in root refs");
13514                         goto out;
13515                 }
13516         }
13517
13518         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13519                 struct extent_buffer *eb;
13520
13521                 eb = list_first_entry(&root->fs_info->recow_ebs,
13522                                       struct extent_buffer, recow);
13523                 list_del_init(&eb->recow);
13524                 ret = recow_extent_buffer(root, eb);
13525                 err |= !!ret;
13526                 if (ret) {
13527                         error("fails to fix transid errors");
13528                         break;
13529                 }
13530         }
13531
13532         while (!list_empty(&delete_items)) {
13533                 struct bad_item *bad;
13534
13535                 bad = list_first_entry(&delete_items, struct bad_item, list);
13536                 list_del_init(&bad->list);
13537                 if (repair) {
13538                         ret = delete_bad_item(root, bad);
13539                         err |= !!ret;
13540                 }
13541                 free(bad);
13542         }
13543
13544         if (info->quota_enabled) {
13545                 fprintf(stderr, "checking quota groups\n");
13546                 ret = qgroup_verify_all(info);
13547                 err |= !!ret;
13548                 if (ret) {
13549                         error("failed to check quota groups");
13550                         goto out;
13551                 }
13552                 report_qgroups(0);
13553                 ret = repair_qgroups(info, &qgroups_repaired);
13554                 err |= !!ret;
13555                 if (err) {
13556                         error("failed to repair quota groups");
13557                         goto out;
13558                 }
13559                 ret = 0;
13560         }
13561
13562         if (!list_empty(&root->fs_info->recow_ebs)) {
13563                 error("transid errors in file system");
13564                 ret = 1;
13565                 err |= !!ret;
13566         }
13567 out:
13568         printf("found %llu bytes used, ",
13569                (unsigned long long)bytes_used);
13570         if (err)
13571                 printf("error(s) found\n");
13572         else
13573                 printf("no error found\n");
13574         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13575         printf("total tree bytes: %llu\n",
13576                (unsigned long long)total_btree_bytes);
13577         printf("total fs tree bytes: %llu\n",
13578                (unsigned long long)total_fs_tree_bytes);
13579         printf("total extent tree bytes: %llu\n",
13580                (unsigned long long)total_extent_tree_bytes);
13581         printf("btree space waste bytes: %llu\n",
13582                (unsigned long long)btree_space_waste);
13583         printf("file data blocks allocated: %llu\n referenced %llu\n",
13584                 (unsigned long long)data_bytes_allocated,
13585                 (unsigned long long)data_bytes_referenced);
13586
13587         free_qgroup_counts();
13588         free_root_recs_tree(&root_cache);
13589 close_out:
13590         close_ctree(root);
13591 err_out:
13592         if (ctx.progress_enabled)
13593                 task_deinit(ctx.info);
13594
13595         return err;
13596 }