btrfs-progs: update CHANGES for v4.13.3
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
140 {
141         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143         struct data_backref *back1 = to_data_backref(ext1);
144         struct data_backref *back2 = to_data_backref(ext2);
145
146         WARN_ON(!ext1->is_data);
147         WARN_ON(!ext2->is_data);
148
149         /* parent and root are a union, so this covers both */
150         if (back1->parent > back2->parent)
151                 return 1;
152         if (back1->parent < back2->parent)
153                 return -1;
154
155         /* This is a full backref and the parents match. */
156         if (back1->node.full_backref)
157                 return 0;
158
159         if (back1->owner > back2->owner)
160                 return 1;
161         if (back1->owner < back2->owner)
162                 return -1;
163
164         if (back1->offset > back2->offset)
165                 return 1;
166         if (back1->offset < back2->offset)
167                 return -1;
168
169         if (back1->found_ref && back2->found_ref) {
170                 if (back1->disk_bytenr > back2->disk_bytenr)
171                         return 1;
172                 if (back1->disk_bytenr < back2->disk_bytenr)
173                         return -1;
174
175                 if (back1->bytes > back2->bytes)
176                         return 1;
177                 if (back1->bytes < back2->bytes)
178                         return -1;
179         }
180
181         return 0;
182 }
183
184 /*
185  * Much like data_backref, just removed the undetermined members
186  * and change it to use list_head.
187  * During extent scan, it is stored in root->orphan_data_extent.
188  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
189  */
190 struct orphan_data_extent {
191         struct list_head list;
192         u64 root;
193         u64 objectid;
194         u64 offset;
195         u64 disk_bytenr;
196         u64 disk_len;
197 };
198
199 struct tree_backref {
200         struct extent_backref node;
201         union {
202                 u64 parent;
203                 u64 root;
204         };
205 };
206
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
208 {
209         return container_of(back, struct tree_backref, node);
210 }
211
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
213 {
214         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216         struct tree_backref *back1 = to_tree_backref(ext1);
217         struct tree_backref *back2 = to_tree_backref(ext2);
218
219         WARN_ON(ext1->is_data);
220         WARN_ON(ext2->is_data);
221
222         /* parent and root are a union, so this covers both */
223         if (back1->parent > back2->parent)
224                 return 1;
225         if (back1->parent < back2->parent)
226                 return -1;
227
228         return 0;
229 }
230
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
232 {
233         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
235
236         if (ext1->is_data > ext2->is_data)
237                 return 1;
238
239         if (ext1->is_data < ext2->is_data)
240                 return -1;
241
242         if (ext1->full_backref > ext2->full_backref)
243                 return 1;
244         if (ext1->full_backref < ext2->full_backref)
245                 return -1;
246
247         if (ext1->is_data)
248                 return compare_data_backref(node1, node2);
249         else
250                 return compare_tree_backref(node1, node2);
251 }
252
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
255
256 struct extent_record {
257         struct list_head backrefs;
258         struct list_head dups;
259         struct rb_root backref_tree;
260         struct list_head list;
261         struct cache_extent cache;
262         struct btrfs_disk_key parent_key;
263         u64 start;
264         u64 max_size;
265         u64 nr;
266         u64 refs;
267         u64 extent_item_refs;
268         u64 generation;
269         u64 parent_generation;
270         u64 info_objectid;
271         u32 num_duplicates;
272         u8 info_level;
273         unsigned int flag_block_full_backref:2;
274         unsigned int found_rec:1;
275         unsigned int content_checked:1;
276         unsigned int owner_ref_checked:1;
277         unsigned int is_root:1;
278         unsigned int metadata:1;
279         unsigned int bad_full_backref:1;
280         unsigned int crossing_stripes:1;
281         unsigned int wrong_chunk_type:1;
282 };
283
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
285 {
286         return container_of(entry, struct extent_record, list);
287 }
288
289 struct inode_backref {
290         struct list_head list;
291         unsigned int found_dir_item:1;
292         unsigned int found_dir_index:1;
293         unsigned int found_inode_ref:1;
294         u8 filetype;
295         u8 ref_type;
296         int errors;
297         u64 dir;
298         u64 index;
299         u16 namelen;
300         char name[0];
301 };
302
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
304 {
305         return list_entry(entry, struct inode_backref, list);
306 }
307
308 struct root_item_record {
309         struct list_head list;
310         u64 objectid;
311         u64 bytenr;
312         u64 last_snapshot;
313         u8 level;
314         u8 drop_level;
315         struct btrfs_key drop_key;
316 };
317
318 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
320 #define REF_ERR_NO_INODE_REF            (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
323 #define REF_ERR_DUP_INODE_REF           (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF             (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
331
332 struct file_extent_hole {
333         struct rb_node node;
334         u64 start;
335         u64 len;
336 };
337
338 struct inode_record {
339         struct list_head backrefs;
340         unsigned int checked:1;
341         unsigned int merging:1;
342         unsigned int found_inode_item:1;
343         unsigned int found_dir_item:1;
344         unsigned int found_file_extent:1;
345         unsigned int found_csum_item:1;
346         unsigned int some_csum_missing:1;
347         unsigned int nodatasum:1;
348         int errors;
349
350         u64 ino;
351         u32 nlink;
352         u32 imode;
353         u64 isize;
354         u64 nbytes;
355
356         u32 found_link;
357         u64 found_size;
358         u64 extent_start;
359         u64 extent_end;
360         struct rb_root holes;
361         struct list_head orphan_extents;
362
363         u32 refs;
364 };
365
366 #define I_ERR_NO_INODE_ITEM             (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
381
382 struct root_backref {
383         struct list_head list;
384         unsigned int found_dir_item:1;
385         unsigned int found_dir_index:1;
386         unsigned int found_back_ref:1;
387         unsigned int found_forward_ref:1;
388         unsigned int reachable:1;
389         int errors;
390         u64 ref_root;
391         u64 dir;
392         u64 index;
393         u16 namelen;
394         char name[0];
395 };
396
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
398 {
399         return list_entry(entry, struct root_backref, list);
400 }
401
402 struct root_record {
403         struct list_head backrefs;
404         struct cache_extent cache;
405         unsigned int found_root_item:1;
406         u64 objectid;
407         u32 found_ref;
408 };
409
410 struct ptr_node {
411         struct cache_extent cache;
412         void *data;
413 };
414
415 struct shared_node {
416         struct cache_extent cache;
417         struct cache_tree root_cache;
418         struct cache_tree inode_cache;
419         struct inode_record *current;
420         u32 refs;
421 };
422
423 struct block_info {
424         u64 start;
425         u32 size;
426 };
427
428 struct walk_control {
429         struct cache_tree shared;
430         struct shared_node *nodes[BTRFS_MAX_LEVEL];
431         int active_node;
432         int root_level;
433 };
434
435 struct bad_item {
436         struct btrfs_key key;
437         u64 root_id;
438         struct list_head list;
439 };
440
441 struct extent_entry {
442         u64 bytenr;
443         u64 bytes;
444         int count;
445         int broken;
446         struct list_head list;
447 };
448
449 struct root_item_info {
450         /* level of the root */
451         u8 level;
452         /* number of nodes at this level, must be 1 for a root */
453         int node_count;
454         u64 bytenr;
455         u64 gen;
456         struct cache_extent cache_extent;
457 };
458
459 /*
460  * Error bit for low memory mode check.
461  *
462  * Currently no caller cares about it yet.  Just internal use for error
463  * classification.
464  */
465 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH     (1 << 8)
475
476 static void *print_status_check(void *p)
477 {
478         struct task_ctx *priv = p;
479         const char work_indicator[] = { '.', 'o', 'O', 'o' };
480         uint32_t count = 0;
481         static char *task_position_string[] = {
482                 "checking extents",
483                 "checking free space cache",
484                 "checking fs roots",
485         };
486
487         task_period_start(priv->info, 1000 /* 1s */);
488
489         if (priv->tp == TASK_NOTHING)
490                 return NULL;
491
492         while (1) {
493                 printf("%s [%c]\r", task_position_string[priv->tp],
494                                 work_indicator[count % 4]);
495                 count++;
496                 fflush(stdout);
497                 task_period_wait(priv->info);
498         }
499         return NULL;
500 }
501
502 static int print_status_return(void *p)
503 {
504         printf("\n");
505         fflush(stdout);
506
507         return 0;
508 }
509
510 static enum btrfs_check_mode parse_check_mode(const char *str)
511 {
512         if (strcmp(str, "lowmem") == 0)
513                 return CHECK_MODE_LOWMEM;
514         if (strcmp(str, "orig") == 0)
515                 return CHECK_MODE_ORIGINAL;
516         if (strcmp(str, "original") == 0)
517                 return CHECK_MODE_ORIGINAL;
518
519         return CHECK_MODE_UNKNOWN;
520 }
521
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
524 {
525         struct file_extent_hole *hole;
526
527         if (RB_EMPTY_ROOT(holes))
528                 return (u64)-1;
529
530         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
531         return hole->start;
532 }
533
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
535 {
536         struct file_extent_hole *hole1;
537         struct file_extent_hole *hole2;
538
539         hole1 = rb_entry(node1, struct file_extent_hole, node);
540         hole2 = rb_entry(node2, struct file_extent_hole, node);
541
542         if (hole1->start > hole2->start)
543                 return -1;
544         if (hole1->start < hole2->start)
545                 return 1;
546         /* Now hole1->start == hole2->start */
547         if (hole1->len >= hole2->len)
548                 /*
549                  * Hole 1 will be merge center
550                  * Same hole will be merged later
551                  */
552                 return -1;
553         /* Hole 2 will be merge center */
554         return 1;
555 }
556
557 /*
558  * Add a hole to the record
559  *
560  * This will do hole merge for copy_file_extent_holes(),
561  * which will ensure there won't be continuous holes.
562  */
563 static int add_file_extent_hole(struct rb_root *holes,
564                                 u64 start, u64 len)
565 {
566         struct file_extent_hole *hole;
567         struct file_extent_hole *prev = NULL;
568         struct file_extent_hole *next = NULL;
569
570         hole = malloc(sizeof(*hole));
571         if (!hole)
572                 return -ENOMEM;
573         hole->start = start;
574         hole->len = len;
575         /* Since compare will not return 0, no -EEXIST will happen */
576         rb_insert(holes, &hole->node, compare_hole);
577
578         /* simple merge with previous hole */
579         if (rb_prev(&hole->node))
580                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
581                                 node);
582         if (prev && prev->start + prev->len >= hole->start) {
583                 hole->len = hole->start + hole->len - prev->start;
584                 hole->start = prev->start;
585                 rb_erase(&prev->node, holes);
586                 free(prev);
587                 prev = NULL;
588         }
589
590         /* iterate merge with next holes */
591         while (1) {
592                 if (!rb_next(&hole->node))
593                         break;
594                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
595                                         node);
596                 if (hole->start + hole->len >= next->start) {
597                         if (hole->start + hole->len <= next->start + next->len)
598                                 hole->len = next->start + next->len -
599                                             hole->start;
600                         rb_erase(&next->node, holes);
601                         free(next);
602                         next = NULL;
603                 } else
604                         break;
605         }
606         return 0;
607 }
608
609 static int compare_hole_range(struct rb_node *node, void *data)
610 {
611         struct file_extent_hole *hole;
612         u64 start;
613
614         hole = (struct file_extent_hole *)data;
615         start = hole->start;
616
617         hole = rb_entry(node, struct file_extent_hole, node);
618         if (start < hole->start)
619                 return -1;
620         if (start >= hole->start && start < hole->start + hole->len)
621                 return 0;
622         return 1;
623 }
624
625 /*
626  * Delete a hole in the record
627  *
628  * This will do the hole split and is much restrict than add.
629  */
630 static int del_file_extent_hole(struct rb_root *holes,
631                                 u64 start, u64 len)
632 {
633         struct file_extent_hole *hole;
634         struct file_extent_hole tmp;
635         u64 prev_start = 0;
636         u64 prev_len = 0;
637         u64 next_start = 0;
638         u64 next_len = 0;
639         struct rb_node *node;
640         int have_prev = 0;
641         int have_next = 0;
642         int ret = 0;
643
644         tmp.start = start;
645         tmp.len = len;
646         node = rb_search(holes, &tmp, compare_hole_range, NULL);
647         if (!node)
648                 return -EEXIST;
649         hole = rb_entry(node, struct file_extent_hole, node);
650         if (start + len > hole->start + hole->len)
651                 return -EEXIST;
652
653         /*
654          * Now there will be no overlap, delete the hole and re-add the
655          * split(s) if they exists.
656          */
657         if (start > hole->start) {
658                 prev_start = hole->start;
659                 prev_len = start - hole->start;
660                 have_prev = 1;
661         }
662         if (hole->start + hole->len > start + len) {
663                 next_start = start + len;
664                 next_len = hole->start + hole->len - start - len;
665                 have_next = 1;
666         }
667         rb_erase(node, holes);
668         free(hole);
669         if (have_prev) {
670                 ret = add_file_extent_hole(holes, prev_start, prev_len);
671                 if (ret < 0)
672                         return ret;
673         }
674         if (have_next) {
675                 ret = add_file_extent_hole(holes, next_start, next_len);
676                 if (ret < 0)
677                         return ret;
678         }
679         return 0;
680 }
681
682 static int copy_file_extent_holes(struct rb_root *dst,
683                                   struct rb_root *src)
684 {
685         struct file_extent_hole *hole;
686         struct rb_node *node;
687         int ret = 0;
688
689         node = rb_first(src);
690         while (node) {
691                 hole = rb_entry(node, struct file_extent_hole, node);
692                 ret = add_file_extent_hole(dst, hole->start, hole->len);
693                 if (ret)
694                         break;
695                 node = rb_next(node);
696         }
697         return ret;
698 }
699
700 static void free_file_extent_holes(struct rb_root *holes)
701 {
702         struct rb_node *node;
703         struct file_extent_hole *hole;
704
705         node = rb_first(holes);
706         while (node) {
707                 hole = rb_entry(node, struct file_extent_hole, node);
708                 rb_erase(node, holes);
709                 free(hole);
710                 node = rb_first(holes);
711         }
712 }
713
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
715
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717                                  struct btrfs_root *root)
718 {
719         if (root->last_trans != trans->transid) {
720                 root->track_dirty = 1;
721                 root->last_trans = trans->transid;
722                 root->commit_root = root->node;
723                 extent_buffer_get(root->node);
724         }
725 }
726
727 static u8 imode_to_type(u32 imode)
728 {
729 #define S_SHIFT 12
730         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
732                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
733                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
734                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
735                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
736                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
737                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
738         };
739
740         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
741 #undef S_SHIFT
742 }
743
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
745 {
746         struct device_record *rec1;
747         struct device_record *rec2;
748
749         rec1 = rb_entry(node1, struct device_record, node);
750         rec2 = rb_entry(node2, struct device_record, node);
751         if (rec1->devid > rec2->devid)
752                 return -1;
753         else if (rec1->devid < rec2->devid)
754                 return 1;
755         else
756                 return 0;
757 }
758
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
760 {
761         struct inode_record *rec;
762         struct inode_backref *backref;
763         struct inode_backref *orig;
764         struct inode_backref *tmp;
765         struct orphan_data_extent *src_orphan;
766         struct orphan_data_extent *dst_orphan;
767         struct rb_node *rb;
768         size_t size;
769         int ret;
770
771         rec = malloc(sizeof(*rec));
772         if (!rec)
773                 return ERR_PTR(-ENOMEM);
774         memcpy(rec, orig_rec, sizeof(*rec));
775         rec->refs = 1;
776         INIT_LIST_HEAD(&rec->backrefs);
777         INIT_LIST_HEAD(&rec->orphan_extents);
778         rec->holes = RB_ROOT;
779
780         list_for_each_entry(orig, &orig_rec->backrefs, list) {
781                 size = sizeof(*orig) + orig->namelen + 1;
782                 backref = malloc(size);
783                 if (!backref) {
784                         ret = -ENOMEM;
785                         goto cleanup;
786                 }
787                 memcpy(backref, orig, size);
788                 list_add_tail(&backref->list, &rec->backrefs);
789         }
790         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791                 dst_orphan = malloc(sizeof(*dst_orphan));
792                 if (!dst_orphan) {
793                         ret = -ENOMEM;
794                         goto cleanup;
795                 }
796                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
798         }
799         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
800         if (ret < 0)
801                 goto cleanup_rb;
802
803         return rec;
804
805 cleanup_rb:
806         rb = rb_first(&rec->holes);
807         while (rb) {
808                 struct file_extent_hole *hole;
809
810                 hole = rb_entry(rb, struct file_extent_hole, node);
811                 rb = rb_next(rb);
812                 free(hole);
813         }
814
815 cleanup:
816         if (!list_empty(&rec->backrefs))
817                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818                         list_del(&orig->list);
819                         free(orig);
820                 }
821
822         if (!list_empty(&rec->orphan_extents))
823                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824                         list_del(&orig->list);
825                         free(orig);
826                 }
827
828         free(rec);
829
830         return ERR_PTR(ret);
831 }
832
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
834                                       u64 objectid)
835 {
836         struct orphan_data_extent *orphan;
837
838         if (list_empty(orphan_extents))
839                 return;
840         printf("The following data extent is lost in tree %llu:\n",
841                objectid);
842         list_for_each_entry(orphan, orphan_extents, list) {
843                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
845                        orphan->disk_len);
846         }
847 }
848
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
850 {
851         u64 root_objectid = root->root_key.objectid;
852         int errors = rec->errors;
853
854         if (!errors)
855                 return;
856         /* reloc root errors, we print its corresponding fs root objectid*/
857         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858                 root_objectid = root->root_key.offset;
859                 fprintf(stderr, "reloc");
860         }
861         fprintf(stderr, "root %llu inode %llu errors %x",
862                 (unsigned long long) root_objectid,
863                 (unsigned long long) rec->ino, rec->errors);
864
865         if (errors & I_ERR_NO_INODE_ITEM)
866                 fprintf(stderr, ", no inode item");
867         if (errors & I_ERR_NO_ORPHAN_ITEM)
868                 fprintf(stderr, ", no orphan item");
869         if (errors & I_ERR_DUP_INODE_ITEM)
870                 fprintf(stderr, ", dup inode item");
871         if (errors & I_ERR_DUP_DIR_INDEX)
872                 fprintf(stderr, ", dup dir index");
873         if (errors & I_ERR_ODD_DIR_ITEM)
874                 fprintf(stderr, ", odd dir item");
875         if (errors & I_ERR_ODD_FILE_EXTENT)
876                 fprintf(stderr, ", odd file extent");
877         if (errors & I_ERR_BAD_FILE_EXTENT)
878                 fprintf(stderr, ", bad file extent");
879         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880                 fprintf(stderr, ", file extent overlap");
881         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882                 fprintf(stderr, ", file extent discount");
883         if (errors & I_ERR_DIR_ISIZE_WRONG)
884                 fprintf(stderr, ", dir isize wrong");
885         if (errors & I_ERR_FILE_NBYTES_WRONG)
886                 fprintf(stderr, ", nbytes wrong");
887         if (errors & I_ERR_ODD_CSUM_ITEM)
888                 fprintf(stderr, ", odd csum item");
889         if (errors & I_ERR_SOME_CSUM_MISSING)
890                 fprintf(stderr, ", some csum missing");
891         if (errors & I_ERR_LINK_COUNT_WRONG)
892                 fprintf(stderr, ", link count wrong");
893         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894                 fprintf(stderr, ", orphan file extent");
895         fprintf(stderr, "\n");
896         /* Print the orphan extents if needed */
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
899
900         /* Print the holes if needed */
901         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902                 struct file_extent_hole *hole;
903                 struct rb_node *node;
904                 int found = 0;
905
906                 node = rb_first(&rec->holes);
907                 fprintf(stderr, "Found file extent holes:\n");
908                 while (node) {
909                         found = 1;
910                         hole = rb_entry(node, struct file_extent_hole, node);
911                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
912                                 hole->start, hole->len);
913                         node = rb_next(node);
914                 }
915                 if (!found)
916                         fprintf(stderr, "\tstart: 0, len: %llu\n",
917                                 round_up(rec->isize,
918                                          root->fs_info->sectorsize));
919         }
920 }
921
922 static void print_ref_error(int errors)
923 {
924         if (errors & REF_ERR_NO_DIR_ITEM)
925                 fprintf(stderr, ", no dir item");
926         if (errors & REF_ERR_NO_DIR_INDEX)
927                 fprintf(stderr, ", no dir index");
928         if (errors & REF_ERR_NO_INODE_REF)
929                 fprintf(stderr, ", no inode ref");
930         if (errors & REF_ERR_DUP_DIR_ITEM)
931                 fprintf(stderr, ", dup dir item");
932         if (errors & REF_ERR_DUP_DIR_INDEX)
933                 fprintf(stderr, ", dup dir index");
934         if (errors & REF_ERR_DUP_INODE_REF)
935                 fprintf(stderr, ", dup inode ref");
936         if (errors & REF_ERR_INDEX_UNMATCH)
937                 fprintf(stderr, ", index mismatch");
938         if (errors & REF_ERR_FILETYPE_UNMATCH)
939                 fprintf(stderr, ", filetype mismatch");
940         if (errors & REF_ERR_NAME_TOO_LONG)
941                 fprintf(stderr, ", name too long");
942         if (errors & REF_ERR_NO_ROOT_REF)
943                 fprintf(stderr, ", no root ref");
944         if (errors & REF_ERR_NO_ROOT_BACKREF)
945                 fprintf(stderr, ", no root backref");
946         if (errors & REF_ERR_DUP_ROOT_REF)
947                 fprintf(stderr, ", dup root ref");
948         if (errors & REF_ERR_DUP_ROOT_BACKREF)
949                 fprintf(stderr, ", dup root backref");
950         fprintf(stderr, "\n");
951 }
952
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
954                                           u64 ino, int mod)
955 {
956         struct ptr_node *node;
957         struct cache_extent *cache;
958         struct inode_record *rec = NULL;
959         int ret;
960
961         cache = lookup_cache_extent(inode_cache, ino, 1);
962         if (cache) {
963                 node = container_of(cache, struct ptr_node, cache);
964                 rec = node->data;
965                 if (mod && rec->refs > 1) {
966                         node->data = clone_inode_rec(rec);
967                         if (IS_ERR(node->data))
968                                 return node->data;
969                         rec->refs--;
970                         rec = node->data;
971                 }
972         } else if (mod) {
973                 rec = calloc(1, sizeof(*rec));
974                 if (!rec)
975                         return ERR_PTR(-ENOMEM);
976                 rec->ino = ino;
977                 rec->extent_start = (u64)-1;
978                 rec->refs = 1;
979                 INIT_LIST_HEAD(&rec->backrefs);
980                 INIT_LIST_HEAD(&rec->orphan_extents);
981                 rec->holes = RB_ROOT;
982
983                 node = malloc(sizeof(*node));
984                 if (!node) {
985                         free(rec);
986                         return ERR_PTR(-ENOMEM);
987                 }
988                 node->cache.start = ino;
989                 node->cache.size = 1;
990                 node->data = rec;
991
992                 if (ino == BTRFS_FREE_INO_OBJECTID)
993                         rec->found_link = 1;
994
995                 ret = insert_cache_extent(inode_cache, &node->cache);
996                 if (ret)
997                         return ERR_PTR(-EEXIST);
998         }
999         return rec;
1000 }
1001
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1003 {
1004         struct orphan_data_extent *orphan;
1005
1006         while (!list_empty(orphan_extents)) {
1007                 orphan = list_entry(orphan_extents->next,
1008                                     struct orphan_data_extent, list);
1009                 list_del(&orphan->list);
1010                 free(orphan);
1011         }
1012 }
1013
1014 static void free_inode_rec(struct inode_record *rec)
1015 {
1016         struct inode_backref *backref;
1017
1018         if (--rec->refs > 0)
1019                 return;
1020
1021         while (!list_empty(&rec->backrefs)) {
1022                 backref = to_inode_backref(rec->backrefs.next);
1023                 list_del(&backref->list);
1024                 free(backref);
1025         }
1026         free_orphan_data_extents(&rec->orphan_extents);
1027         free_file_extent_holes(&rec->holes);
1028         free(rec);
1029 }
1030
1031 static int can_free_inode_rec(struct inode_record *rec)
1032 {
1033         if (!rec->errors && rec->checked && rec->found_inode_item &&
1034             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1035                 return 1;
1036         return 0;
1037 }
1038
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040                                  struct inode_record *rec)
1041 {
1042         struct cache_extent *cache;
1043         struct inode_backref *tmp, *backref;
1044         struct ptr_node *node;
1045         u8 filetype;
1046
1047         if (!rec->found_inode_item)
1048                 return;
1049
1050         filetype = imode_to_type(rec->imode);
1051         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052                 if (backref->found_dir_item && backref->found_dir_index) {
1053                         if (backref->filetype != filetype)
1054                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055                         if (!backref->errors && backref->found_inode_ref &&
1056                             rec->nlink == rec->found_link) {
1057                                 list_del(&backref->list);
1058                                 free(backref);
1059                         }
1060                 }
1061         }
1062
1063         if (!rec->checked || rec->merging)
1064                 return;
1065
1066         if (S_ISDIR(rec->imode)) {
1067                 if (rec->found_size != rec->isize)
1068                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069                 if (rec->found_file_extent)
1070                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072                 if (rec->found_dir_item)
1073                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1074                 if (rec->found_size != rec->nbytes)
1075                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076                 if (rec->nlink > 0 && !no_holes &&
1077                     (rec->extent_end < rec->isize ||
1078                      first_extent_gap(&rec->holes) < rec->isize))
1079                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1080         }
1081
1082         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083                 if (rec->found_csum_item && rec->nodatasum)
1084                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085                 if (rec->some_csum_missing && !rec->nodatasum)
1086                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1087         }
1088
1089         BUG_ON(rec->refs != 1);
1090         if (can_free_inode_rec(rec)) {
1091                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092                 node = container_of(cache, struct ptr_node, cache);
1093                 BUG_ON(node->data != rec);
1094                 remove_cache_extent(inode_cache, &node->cache);
1095                 free(node);
1096                 free_inode_rec(rec);
1097         }
1098 }
1099
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1101 {
1102         struct btrfs_path path;
1103         struct btrfs_key key;
1104         int ret;
1105
1106         key.objectid = BTRFS_ORPHAN_OBJECTID;
1107         key.type = BTRFS_ORPHAN_ITEM_KEY;
1108         key.offset = ino;
1109
1110         btrfs_init_path(&path);
1111         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112         btrfs_release_path(&path);
1113         if (ret > 0)
1114                 ret = -ENOENT;
1115         return ret;
1116 }
1117
1118 static int process_inode_item(struct extent_buffer *eb,
1119                               int slot, struct btrfs_key *key,
1120                               struct shared_node *active_node)
1121 {
1122         struct inode_record *rec;
1123         struct btrfs_inode_item *item;
1124
1125         rec = active_node->current;
1126         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127         if (rec->found_inode_item) {
1128                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1129                 return 1;
1130         }
1131         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132         rec->nlink = btrfs_inode_nlink(eb, item);
1133         rec->isize = btrfs_inode_size(eb, item);
1134         rec->nbytes = btrfs_inode_nbytes(eb, item);
1135         rec->imode = btrfs_inode_mode(eb, item);
1136         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1137                 rec->nodatasum = 1;
1138         rec->found_inode_item = 1;
1139         if (rec->nlink == 0)
1140                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141         maybe_free_inode_rec(&active_node->inode_cache, rec);
1142         return 0;
1143 }
1144
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1146                                                 const char *name,
1147                                                 int namelen, u64 dir)
1148 {
1149         struct inode_backref *backref;
1150
1151         list_for_each_entry(backref, &rec->backrefs, list) {
1152                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1153                         break;
1154                 if (backref->dir != dir || backref->namelen != namelen)
1155                         continue;
1156                 if (memcmp(name, backref->name, namelen))
1157                         continue;
1158                 return backref;
1159         }
1160
1161         backref = malloc(sizeof(*backref) + namelen + 1);
1162         if (!backref)
1163                 return NULL;
1164         memset(backref, 0, sizeof(*backref));
1165         backref->dir = dir;
1166         backref->namelen = namelen;
1167         memcpy(backref->name, name, namelen);
1168         backref->name[namelen] = '\0';
1169         list_add_tail(&backref->list, &rec->backrefs);
1170         return backref;
1171 }
1172
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174                              u64 ino, u64 dir, u64 index,
1175                              const char *name, int namelen,
1176                              u8 filetype, u8 itemtype, int errors)
1177 {
1178         struct inode_record *rec;
1179         struct inode_backref *backref;
1180
1181         rec = get_inode_rec(inode_cache, ino, 1);
1182         BUG_ON(IS_ERR(rec));
1183         backref = get_inode_backref(rec, name, namelen, dir);
1184         BUG_ON(!backref);
1185         if (errors)
1186                 backref->errors |= errors;
1187         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188                 if (backref->found_dir_index)
1189                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190                 if (backref->found_inode_ref && backref->index != index)
1191                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1192                 if (backref->found_dir_item && backref->filetype != filetype)
1193                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1194
1195                 backref->index = index;
1196                 backref->filetype = filetype;
1197                 backref->found_dir_index = 1;
1198         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1199                 rec->found_link++;
1200                 if (backref->found_dir_item)
1201                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202                 if (backref->found_dir_index && backref->filetype != filetype)
1203                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1204
1205                 backref->filetype = filetype;
1206                 backref->found_dir_item = 1;
1207         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209                 if (backref->found_inode_ref)
1210                         backref->errors |= REF_ERR_DUP_INODE_REF;
1211                 if (backref->found_dir_index && backref->index != index)
1212                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1213                 else
1214                         backref->index = index;
1215
1216                 backref->ref_type = itemtype;
1217                 backref->found_inode_ref = 1;
1218         } else {
1219                 BUG_ON(1);
1220         }
1221
1222         maybe_free_inode_rec(inode_cache, rec);
1223         return 0;
1224 }
1225
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227                             struct cache_tree *dst_cache)
1228 {
1229         struct inode_backref *backref;
1230         u32 dir_count = 0;
1231         int ret = 0;
1232
1233         dst->merging = 1;
1234         list_for_each_entry(backref, &src->backrefs, list) {
1235                 if (backref->found_dir_index) {
1236                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1237                                         backref->index, backref->name,
1238                                         backref->namelen, backref->filetype,
1239                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1240                 }
1241                 if (backref->found_dir_item) {
1242                         dir_count++;
1243                         add_inode_backref(dst_cache, dst->ino,
1244                                         backref->dir, 0, backref->name,
1245                                         backref->namelen, backref->filetype,
1246                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1247                 }
1248                 if (backref->found_inode_ref) {
1249                         add_inode_backref(dst_cache, dst->ino,
1250                                         backref->dir, backref->index,
1251                                         backref->name, backref->namelen, 0,
1252                                         backref->ref_type, backref->errors);
1253                 }
1254         }
1255
1256         if (src->found_dir_item)
1257                 dst->found_dir_item = 1;
1258         if (src->found_file_extent)
1259                 dst->found_file_extent = 1;
1260         if (src->found_csum_item)
1261                 dst->found_csum_item = 1;
1262         if (src->some_csum_missing)
1263                 dst->some_csum_missing = 1;
1264         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1266                 if (ret < 0)
1267                         return ret;
1268         }
1269
1270         BUG_ON(src->found_link < dir_count);
1271         dst->found_link += src->found_link - dir_count;
1272         dst->found_size += src->found_size;
1273         if (src->extent_start != (u64)-1) {
1274                 if (dst->extent_start == (u64)-1) {
1275                         dst->extent_start = src->extent_start;
1276                         dst->extent_end = src->extent_end;
1277                 } else {
1278                         if (dst->extent_end > src->extent_start)
1279                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280                         else if (dst->extent_end < src->extent_start) {
1281                                 ret = add_file_extent_hole(&dst->holes,
1282                                         dst->extent_end,
1283                                         src->extent_start - dst->extent_end);
1284                         }
1285                         if (dst->extent_end < src->extent_end)
1286                                 dst->extent_end = src->extent_end;
1287                 }
1288         }
1289
1290         dst->errors |= src->errors;
1291         if (src->found_inode_item) {
1292                 if (!dst->found_inode_item) {
1293                         dst->nlink = src->nlink;
1294                         dst->isize = src->isize;
1295                         dst->nbytes = src->nbytes;
1296                         dst->imode = src->imode;
1297                         dst->nodatasum = src->nodatasum;
1298                         dst->found_inode_item = 1;
1299                 } else {
1300                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1301                 }
1302         }
1303         dst->merging = 0;
1304
1305         return 0;
1306 }
1307
1308 static int splice_shared_node(struct shared_node *src_node,
1309                               struct shared_node *dst_node)
1310 {
1311         struct cache_extent *cache;
1312         struct ptr_node *node, *ins;
1313         struct cache_tree *src, *dst;
1314         struct inode_record *rec, *conflict;
1315         u64 current_ino = 0;
1316         int splice = 0;
1317         int ret;
1318
1319         if (--src_node->refs == 0)
1320                 splice = 1;
1321         if (src_node->current)
1322                 current_ino = src_node->current->ino;
1323
1324         src = &src_node->root_cache;
1325         dst = &dst_node->root_cache;
1326 again:
1327         cache = search_cache_extent(src, 0);
1328         while (cache) {
1329                 node = container_of(cache, struct ptr_node, cache);
1330                 rec = node->data;
1331                 cache = next_cache_extent(cache);
1332
1333                 if (splice) {
1334                         remove_cache_extent(src, &node->cache);
1335                         ins = node;
1336                 } else {
1337                         ins = malloc(sizeof(*ins));
1338                         BUG_ON(!ins);
1339                         ins->cache.start = node->cache.start;
1340                         ins->cache.size = node->cache.size;
1341                         ins->data = rec;
1342                         rec->refs++;
1343                 }
1344                 ret = insert_cache_extent(dst, &ins->cache);
1345                 if (ret == -EEXIST) {
1346                         conflict = get_inode_rec(dst, rec->ino, 1);
1347                         BUG_ON(IS_ERR(conflict));
1348                         merge_inode_recs(rec, conflict, dst);
1349                         if (rec->checked) {
1350                                 conflict->checked = 1;
1351                                 if (dst_node->current == conflict)
1352                                         dst_node->current = NULL;
1353                         }
1354                         maybe_free_inode_rec(dst, conflict);
1355                         free_inode_rec(rec);
1356                         free(ins);
1357                 } else {
1358                         BUG_ON(ret);
1359                 }
1360         }
1361
1362         if (src == &src_node->root_cache) {
1363                 src = &src_node->inode_cache;
1364                 dst = &dst_node->inode_cache;
1365                 goto again;
1366         }
1367
1368         if (current_ino > 0 && (!dst_node->current ||
1369             current_ino > dst_node->current->ino)) {
1370                 if (dst_node->current) {
1371                         dst_node->current->checked = 1;
1372                         maybe_free_inode_rec(dst, dst_node->current);
1373                 }
1374                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375                 BUG_ON(IS_ERR(dst_node->current));
1376         }
1377         return 0;
1378 }
1379
1380 static void free_inode_ptr(struct cache_extent *cache)
1381 {
1382         struct ptr_node *node;
1383         struct inode_record *rec;
1384
1385         node = container_of(cache, struct ptr_node, cache);
1386         rec = node->data;
1387         free_inode_rec(rec);
1388         free(node);
1389 }
1390
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1392
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1394                                             u64 bytenr)
1395 {
1396         struct cache_extent *cache;
1397         struct shared_node *node;
1398
1399         cache = lookup_cache_extent(shared, bytenr, 1);
1400         if (cache) {
1401                 node = container_of(cache, struct shared_node, cache);
1402                 return node;
1403         }
1404         return NULL;
1405 }
1406
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1408 {
1409         int ret;
1410         struct shared_node *node;
1411
1412         node = calloc(1, sizeof(*node));
1413         if (!node)
1414                 return -ENOMEM;
1415         node->cache.start = bytenr;
1416         node->cache.size = 1;
1417         cache_tree_init(&node->root_cache);
1418         cache_tree_init(&node->inode_cache);
1419         node->refs = refs;
1420
1421         ret = insert_cache_extent(shared, &node->cache);
1422
1423         return ret;
1424 }
1425
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427                              struct walk_control *wc, int level)
1428 {
1429         struct shared_node *node;
1430         struct shared_node *dest;
1431         int ret;
1432
1433         if (level == wc->active_node)
1434                 return 0;
1435
1436         BUG_ON(wc->active_node <= level);
1437         node = find_shared_node(&wc->shared, bytenr);
1438         if (!node) {
1439                 ret = add_shared_node(&wc->shared, bytenr, refs);
1440                 BUG_ON(ret);
1441                 node = find_shared_node(&wc->shared, bytenr);
1442                 wc->nodes[level] = node;
1443                 wc->active_node = level;
1444                 return 0;
1445         }
1446
1447         if (wc->root_level == wc->active_node &&
1448             btrfs_root_refs(&root->root_item) == 0) {
1449                 if (--node->refs == 0) {
1450                         free_inode_recs_tree(&node->root_cache);
1451                         free_inode_recs_tree(&node->inode_cache);
1452                         remove_cache_extent(&wc->shared, &node->cache);
1453                         free(node);
1454                 }
1455                 return 1;
1456         }
1457
1458         dest = wc->nodes[wc->active_node];
1459         splice_shared_node(node, dest);
1460         if (node->refs == 0) {
1461                 remove_cache_extent(&wc->shared, &node->cache);
1462                 free(node);
1463         }
1464         return 1;
1465 }
1466
1467 static int leave_shared_node(struct btrfs_root *root,
1468                              struct walk_control *wc, int level)
1469 {
1470         struct shared_node *node;
1471         struct shared_node *dest;
1472         int i;
1473
1474         if (level == wc->root_level)
1475                 return 0;
1476
1477         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1478                 if (wc->nodes[i])
1479                         break;
1480         }
1481         BUG_ON(i >= BTRFS_MAX_LEVEL);
1482
1483         node = wc->nodes[wc->active_node];
1484         wc->nodes[wc->active_node] = NULL;
1485         wc->active_node = i;
1486
1487         dest = wc->nodes[wc->active_node];
1488         if (wc->active_node < wc->root_level ||
1489             btrfs_root_refs(&root->root_item) > 0) {
1490                 BUG_ON(node->refs <= 1);
1491                 splice_shared_node(node, dest);
1492         } else {
1493                 BUG_ON(node->refs < 2);
1494                 node->refs--;
1495         }
1496         return 0;
1497 }
1498
1499 /*
1500  * Returns:
1501  * < 0 - on error
1502  * 1   - if the root with id child_root_id is a child of root parent_root_id
1503  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1504  *       has other root(s) as parent(s)
1505  * 2   - if the root child_root_id doesn't have any parent roots
1506  */
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1508                          u64 child_root_id)
1509 {
1510         struct btrfs_path path;
1511         struct btrfs_key key;
1512         struct extent_buffer *leaf;
1513         int has_parent = 0;
1514         int ret;
1515
1516         btrfs_init_path(&path);
1517
1518         key.objectid = parent_root_id;
1519         key.type = BTRFS_ROOT_REF_KEY;
1520         key.offset = child_root_id;
1521         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1522                                 0, 0);
1523         if (ret < 0)
1524                 return ret;
1525         btrfs_release_path(&path);
1526         if (!ret)
1527                 return 1;
1528
1529         key.objectid = child_root_id;
1530         key.type = BTRFS_ROOT_BACKREF_KEY;
1531         key.offset = 0;
1532         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1533                                 0, 0);
1534         if (ret < 0)
1535                 goto out;
1536
1537         while (1) {
1538                 leaf = path.nodes[0];
1539                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1541                         if (ret)
1542                                 break;
1543                         leaf = path.nodes[0];
1544                 }
1545
1546                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547                 if (key.objectid != child_root_id ||
1548                     key.type != BTRFS_ROOT_BACKREF_KEY)
1549                         break;
1550
1551                 has_parent = 1;
1552
1553                 if (key.offset == parent_root_id) {
1554                         btrfs_release_path(&path);
1555                         return 1;
1556                 }
1557
1558                 path.slots[0]++;
1559         }
1560 out:
1561         btrfs_release_path(&path);
1562         if (ret < 0)
1563                 return ret;
1564         return has_parent ? 0 : 2;
1565 }
1566
1567 static int process_dir_item(struct extent_buffer *eb,
1568                             int slot, struct btrfs_key *key,
1569                             struct shared_node *active_node)
1570 {
1571         u32 total;
1572         u32 cur = 0;
1573         u32 len;
1574         u32 name_len;
1575         u32 data_len;
1576         int error;
1577         int nritems = 0;
1578         u8 filetype;
1579         struct btrfs_dir_item *di;
1580         struct inode_record *rec;
1581         struct cache_tree *root_cache;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_key location;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         root_cache = &active_node->root_cache;
1587         inode_cache = &active_node->inode_cache;
1588         rec = active_node->current;
1589         rec->found_dir_item = 1;
1590
1591         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592         total = btrfs_item_size_nr(eb, slot);
1593         while (cur < total) {
1594                 nritems++;
1595                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596                 name_len = btrfs_dir_name_len(eb, di);
1597                 data_len = btrfs_dir_data_len(eb, di);
1598                 filetype = btrfs_dir_type(eb, di);
1599
1600                 rec->found_size += name_len;
1601                 if (cur + sizeof(*di) + name_len > total ||
1602                     name_len > BTRFS_NAME_LEN) {
1603                         error = REF_ERR_NAME_TOO_LONG;
1604
1605                         if (cur + sizeof(*di) > total)
1606                                 break;
1607                         len = min_t(u32, total - cur - sizeof(*di),
1608                                     BTRFS_NAME_LEN);
1609                 } else {
1610                         len = name_len;
1611                         error = 0;
1612                 }
1613
1614                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1615
1616                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617                     key->offset != btrfs_name_hash(namebuf, len)) {
1618                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1619                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620                         key->objectid, key->offset, namebuf, len, filetype,
1621                         key->offset, btrfs_name_hash(namebuf, len));
1622                 }
1623
1624                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625                         add_inode_backref(inode_cache, location.objectid,
1626                                           key->objectid, key->offset, namebuf,
1627                                           len, filetype, key->type, error);
1628                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629                         add_inode_backref(root_cache, location.objectid,
1630                                           key->objectid, key->offset,
1631                                           namebuf, len, filetype,
1632                                           key->type, error);
1633                 } else {
1634                         fprintf(stderr, "invalid location in dir item %u\n",
1635                                 location.type);
1636                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637                                           key->objectid, key->offset, namebuf,
1638                                           len, filetype, key->type, error);
1639                 }
1640
1641                 len = sizeof(*di) + name_len + data_len;
1642                 di = (struct btrfs_dir_item *)((char *)di + len);
1643                 cur += len;
1644         }
1645         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1647
1648         return 0;
1649 }
1650
1651 static int process_inode_ref(struct extent_buffer *eb,
1652                              int slot, struct btrfs_key *key,
1653                              struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         int error;
1661         struct cache_tree *inode_cache;
1662         struct btrfs_inode_ref *ref;
1663         char namebuf[BTRFS_NAME_LEN];
1664
1665         inode_cache = &active_node->inode_cache;
1666
1667         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668         total = btrfs_item_size_nr(eb, slot);
1669         while (cur < total) {
1670                 name_len = btrfs_inode_ref_name_len(eb, ref);
1671                 index = btrfs_inode_ref_index(eb, ref);
1672
1673                 /* inode_ref + namelen should not cross item boundary */
1674                 if (cur + sizeof(*ref) + name_len > total ||
1675                     name_len > BTRFS_NAME_LEN) {
1676                         if (total < cur + sizeof(*ref))
1677                                 break;
1678
1679                         /* Still try to read out the remaining part */
1680                         len = min_t(u32, total - cur - sizeof(*ref),
1681                                     BTRFS_NAME_LEN);
1682                         error = REF_ERR_NAME_TOO_LONG;
1683                 } else {
1684                         len = name_len;
1685                         error = 0;
1686                 }
1687
1688                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689                 add_inode_backref(inode_cache, key->objectid, key->offset,
1690                                   index, namebuf, len, 0, key->type, error);
1691
1692                 len = sizeof(*ref) + name_len;
1693                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1694                 cur += len;
1695         }
1696         return 0;
1697 }
1698
1699 static int process_inode_extref(struct extent_buffer *eb,
1700                                 int slot, struct btrfs_key *key,
1701                                 struct shared_node *active_node)
1702 {
1703         u32 total;
1704         u32 cur = 0;
1705         u32 len;
1706         u32 name_len;
1707         u64 index;
1708         u64 parent;
1709         int error;
1710         struct cache_tree *inode_cache;
1711         struct btrfs_inode_extref *extref;
1712         char namebuf[BTRFS_NAME_LEN];
1713
1714         inode_cache = &active_node->inode_cache;
1715
1716         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717         total = btrfs_item_size_nr(eb, slot);
1718         while (cur < total) {
1719                 name_len = btrfs_inode_extref_name_len(eb, extref);
1720                 index = btrfs_inode_extref_index(eb, extref);
1721                 parent = btrfs_inode_extref_parent(eb, extref);
1722                 if (name_len <= BTRFS_NAME_LEN) {
1723                         len = name_len;
1724                         error = 0;
1725                 } else {
1726                         len = BTRFS_NAME_LEN;
1727                         error = REF_ERR_NAME_TOO_LONG;
1728                 }
1729                 read_extent_buffer(eb, namebuf,
1730                                    (unsigned long)(extref + 1), len);
1731                 add_inode_backref(inode_cache, key->objectid, parent,
1732                                   index, namebuf, len, 0, key->type, error);
1733
1734                 len = sizeof(*extref) + name_len;
1735                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1736                 cur += len;
1737         }
1738         return 0;
1739
1740 }
1741
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743                             u64 len, u64 *found)
1744 {
1745         struct btrfs_key key;
1746         struct btrfs_path path;
1747         struct extent_buffer *leaf;
1748         int ret;
1749         size_t size;
1750         *found = 0;
1751         u64 csum_end;
1752         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1753
1754         btrfs_init_path(&path);
1755
1756         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1757         key.offset = start;
1758         key.type = BTRFS_EXTENT_CSUM_KEY;
1759
1760         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1761                                 &key, &path, 0, 0);
1762         if (ret < 0)
1763                 goto out;
1764         if (ret > 0 && path.slots[0] > 0) {
1765                 leaf = path.nodes[0];
1766                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768                     key.type == BTRFS_EXTENT_CSUM_KEY)
1769                         path.slots[0]--;
1770         }
1771
1772         while (len > 0) {
1773                 leaf = path.nodes[0];
1774                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1776                         if (ret > 0)
1777                                 break;
1778                         else if (ret < 0)
1779                                 goto out;
1780                         leaf = path.nodes[0];
1781                 }
1782
1783                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785                     key.type != BTRFS_EXTENT_CSUM_KEY)
1786                         break;
1787
1788                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789                 if (key.offset >= start + len)
1790                         break;
1791
1792                 if (key.offset > start)
1793                         start = key.offset;
1794
1795                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796                 csum_end = key.offset + (size / csum_size) *
1797                            root->fs_info->sectorsize;
1798                 if (csum_end > start) {
1799                         size = min(csum_end - start, len);
1800                         len -= size;
1801                         start += size;
1802                         *found += size;
1803                 }
1804
1805                 path.slots[0]++;
1806         }
1807 out:
1808         btrfs_release_path(&path);
1809         if (ret < 0)
1810                 return ret;
1811         return 0;
1812 }
1813
1814 static int process_file_extent(struct btrfs_root *root,
1815                                 struct extent_buffer *eb,
1816                                 int slot, struct btrfs_key *key,
1817                                 struct shared_node *active_node)
1818 {
1819         struct inode_record *rec;
1820         struct btrfs_file_extent_item *fi;
1821         u64 num_bytes = 0;
1822         u64 disk_bytenr = 0;
1823         u64 extent_offset = 0;
1824         u64 mask = root->fs_info->sectorsize - 1;
1825         int extent_type;
1826         int ret;
1827
1828         rec = active_node->current;
1829         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830         rec->found_file_extent = 1;
1831
1832         if (rec->extent_start == (u64)-1) {
1833                 rec->extent_start = key->offset;
1834                 rec->extent_end = key->offset;
1835         }
1836
1837         if (rec->extent_end > key->offset)
1838                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839         else if (rec->extent_end < key->offset) {
1840                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841                                            key->offset - rec->extent_end);
1842                 if (ret < 0)
1843                         return ret;
1844         }
1845
1846         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847         extent_type = btrfs_file_extent_type(eb, fi);
1848
1849         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1851                 if (num_bytes == 0)
1852                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853                 rec->found_size += num_bytes;
1854                 num_bytes = (num_bytes + mask) & ~mask;
1855         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859                 extent_offset = btrfs_file_extent_offset(eb, fi);
1860                 if (num_bytes == 0 || (num_bytes & mask))
1861                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862                 if (num_bytes + extent_offset >
1863                     btrfs_file_extent_ram_bytes(eb, fi))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866                     (btrfs_file_extent_compression(eb, fi) ||
1867                      btrfs_file_extent_encryption(eb, fi) ||
1868                      btrfs_file_extent_other_encoding(eb, fi)))
1869                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870                 if (disk_bytenr > 0)
1871                         rec->found_size += num_bytes;
1872         } else {
1873                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874         }
1875         rec->extent_end = key->offset + num_bytes;
1876
1877         /*
1878          * The data reloc tree will copy full extents into its inode and then
1879          * copy the corresponding csums.  Because the extent it copied could be
1880          * a preallocated extent that hasn't been written to yet there may be no
1881          * csums to copy, ergo we won't have csums for our file extent.  This is
1882          * ok so just don't bother checking csums if the inode belongs to the
1883          * data reloc tree.
1884          */
1885         if (disk_bytenr > 0 &&
1886             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1887                 u64 found;
1888                 if (btrfs_file_extent_compression(eb, fi))
1889                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1890                 else
1891                         disk_bytenr += extent_offset;
1892
1893                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1894                 if (ret < 0)
1895                         return ret;
1896                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1897                         if (found > 0)
1898                                 rec->found_csum_item = 1;
1899                         if (found < num_bytes)
1900                                 rec->some_csum_missing = 1;
1901                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1902                         if (found > 0)
1903                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1904                 }
1905         }
1906         return 0;
1907 }
1908
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910                             struct walk_control *wc)
1911 {
1912         struct btrfs_key key;
1913         u32 nritems;
1914         int i;
1915         int ret = 0;
1916         struct cache_tree *inode_cache;
1917         struct shared_node *active_node;
1918
1919         if (wc->root_level == wc->active_node &&
1920             btrfs_root_refs(&root->root_item) == 0)
1921                 return 0;
1922
1923         active_node = wc->nodes[wc->active_node];
1924         inode_cache = &active_node->inode_cache;
1925         nritems = btrfs_header_nritems(eb);
1926         for (i = 0; i < nritems; i++) {
1927                 btrfs_item_key_to_cpu(eb, &key, i);
1928
1929                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1930                         continue;
1931                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1932                         continue;
1933
1934                 if (active_node->current == NULL ||
1935                     active_node->current->ino < key.objectid) {
1936                         if (active_node->current) {
1937                                 active_node->current->checked = 1;
1938                                 maybe_free_inode_rec(inode_cache,
1939                                                      active_node->current);
1940                         }
1941                         active_node->current = get_inode_rec(inode_cache,
1942                                                              key.objectid, 1);
1943                         BUG_ON(IS_ERR(active_node->current));
1944                 }
1945                 switch (key.type) {
1946                 case BTRFS_DIR_ITEM_KEY:
1947                 case BTRFS_DIR_INDEX_KEY:
1948                         ret = process_dir_item(eb, i, &key, active_node);
1949                         break;
1950                 case BTRFS_INODE_REF_KEY:
1951                         ret = process_inode_ref(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_EXTREF_KEY:
1954                         ret = process_inode_extref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_ITEM_KEY:
1957                         ret = process_inode_item(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_EXTENT_DATA_KEY:
1960                         ret = process_file_extent(root, eb, i, &key,
1961                                                   active_node);
1962                         break;
1963                 default:
1964                         break;
1965                 };
1966         }
1967         return ret;
1968 }
1969
1970 struct node_refs {
1971         u64 bytenr[BTRFS_MAX_LEVEL];
1972         u64 refs[BTRFS_MAX_LEVEL];
1973         int need_check[BTRFS_MAX_LEVEL];
1974 };
1975
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977                              struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979                             unsigned int ext_ref);
1980
1981 /*
1982  * Returns >0  Found error, not fatal, should continue
1983  * Returns <0  Fatal error, must exit the whole check
1984  * Returns 0   No errors found
1985  */
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987                                struct node_refs *nrefs, int *level, int ext_ref)
1988 {
1989         struct extent_buffer *cur = path->nodes[0];
1990         struct btrfs_key key;
1991         u64 cur_bytenr;
1992         u32 nritems;
1993         u64 first_ino = 0;
1994         int root_level = btrfs_header_level(root->node);
1995         int i;
1996         int ret = 0; /* Final return value */
1997         int err = 0; /* Positive error bitmap */
1998
1999         cur_bytenr = cur->start;
2000
2001         /* skip to first inode item or the first inode number change */
2002         nritems = btrfs_header_nritems(cur);
2003         for (i = 0; i < nritems; i++) {
2004                 btrfs_item_key_to_cpu(cur, &key, i);
2005                 if (i == 0)
2006                         first_ino = key.objectid;
2007                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008                     (first_ino && first_ino != key.objectid))
2009                         break;
2010         }
2011         if (i == nritems) {
2012                 path->slots[0] = nritems;
2013                 return 0;
2014         }
2015         path->slots[0] = i;
2016
2017 again:
2018         err |= check_inode_item(root, path, ext_ref);
2019
2020         if (err & LAST_ITEM)
2021                 goto out;
2022
2023         /* still have inode items in thie leaf */
2024         if (cur->start == cur_bytenr)
2025                 goto again;
2026
2027         /*
2028          * we have switched to another leaf, above nodes may
2029          * have changed, here walk down the path, if a node
2030          * or leaf is shared, check whether we can skip this
2031          * node or leaf.
2032          */
2033         for (i = root_level; i >= 0; i--) {
2034                 if (path->nodes[i]->start == nrefs->bytenr[i])
2035                         continue;
2036
2037                 ret = update_nodes_refs(root,
2038                                 path->nodes[i]->start,
2039                                 nrefs, i);
2040                 if (ret)
2041                         goto out;
2042
2043                 if (!nrefs->need_check[i]) {
2044                         *level += 1;
2045                         break;
2046                 }
2047         }
2048
2049         for (i = 0; i < *level; i++) {
2050                 free_extent_buffer(path->nodes[i]);
2051                 path->nodes[i] = NULL;
2052         }
2053 out:
2054         err &= ~LAST_ITEM;
2055         if (err && !ret)
2056                 ret = err;
2057         return ret;
2058 }
2059
2060 static void reada_walk_down(struct btrfs_root *root,
2061                             struct extent_buffer *node, int slot)
2062 {
2063         struct btrfs_fs_info *fs_info = root->fs_info;
2064         u64 bytenr;
2065         u64 ptr_gen;
2066         u32 nritems;
2067         int i;
2068         int level;
2069
2070         level = btrfs_header_level(node);
2071         if (level != 1)
2072                 return;
2073
2074         nritems = btrfs_header_nritems(node);
2075         for (i = slot; i < nritems; i++) {
2076                 bytenr = btrfs_node_blockptr(node, i);
2077                 ptr_gen = btrfs_node_ptr_generation(node, i);
2078                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2079         }
2080 }
2081
2082 /*
2083  * Check the child node/leaf by the following condition:
2084  * 1. the first item key of the node/leaf should be the same with the one
2085  *    in parent.
2086  * 2. block in parent node should match the child node/leaf.
2087  * 3. generation of parent node and child's header should be consistent.
2088  *
2089  * Or the child node/leaf pointed by the key in parent is not valid.
2090  *
2091  * We hope to check leaf owner too, but since subvol may share leaves,
2092  * which makes leaf owner check not so strong, key check should be
2093  * sufficient enough for that case.
2094  */
2095 static int check_child_node(struct extent_buffer *parent, int slot,
2096                             struct extent_buffer *child)
2097 {
2098         struct btrfs_key parent_key;
2099         struct btrfs_key child_key;
2100         int ret = 0;
2101
2102         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2103         if (btrfs_header_level(child) == 0)
2104                 btrfs_item_key_to_cpu(child, &child_key, 0);
2105         else
2106                 btrfs_node_key_to_cpu(child, &child_key, 0);
2107
2108         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2109                 ret = -EINVAL;
2110                 fprintf(stderr,
2111                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2112                         parent_key.objectid, parent_key.type, parent_key.offset,
2113                         child_key.objectid, child_key.type, child_key.offset);
2114         }
2115         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2116                 ret = -EINVAL;
2117                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2118                         btrfs_node_blockptr(parent, slot),
2119                         btrfs_header_bytenr(child));
2120         }
2121         if (btrfs_node_ptr_generation(parent, slot) !=
2122             btrfs_header_generation(child)) {
2123                 ret = -EINVAL;
2124                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2125                         btrfs_header_generation(child),
2126                         btrfs_node_ptr_generation(parent, slot));
2127         }
2128         return ret;
2129 }
2130
2131 /*
2132  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2133  * in every fs or file tree check. Here we find its all root ids, and only check
2134  * it in the fs or file tree which has the smallest root id.
2135  */
2136 static int need_check(struct btrfs_root *root, struct ulist *roots)
2137 {
2138         struct rb_node *node;
2139         struct ulist_node *u;
2140
2141         if (roots->nnodes == 1)
2142                 return 1;
2143
2144         node = rb_first(&roots->root);
2145         u = rb_entry(node, struct ulist_node, rb_node);
2146         /*
2147          * current root id is not smallest, we skip it and let it be checked
2148          * in the fs or file tree who hash the smallest root id.
2149          */
2150         if (root->objectid != u->val)
2151                 return 0;
2152
2153         return 1;
2154 }
2155
2156 /*
2157  * for a tree node or leaf, we record its reference count, so later if we still
2158  * process this node or leaf, don't need to compute its reference count again.
2159  */
2160 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2161                              struct node_refs *nrefs, u64 level)
2162 {
2163         int check, ret;
2164         u64 refs;
2165         struct ulist *roots;
2166
2167         if (nrefs->bytenr[level] != bytenr) {
2168                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2169                                        level, 1, &refs, NULL);
2170                 if (ret < 0)
2171                         return ret;
2172
2173                 nrefs->bytenr[level] = bytenr;
2174                 nrefs->refs[level] = refs;
2175                 if (refs > 1) {
2176                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2177                                                    0, &roots);
2178                         if (ret)
2179                                 return -EIO;
2180
2181                         check = need_check(root, roots);
2182                         ulist_free(roots);
2183                         nrefs->need_check[level] = check;
2184                 } else {
2185                         nrefs->need_check[level] = 1;
2186                 }
2187         }
2188
2189         return 0;
2190 }
2191
2192 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2193                           struct walk_control *wc, int *level,
2194                           struct node_refs *nrefs)
2195 {
2196         enum btrfs_tree_block_status status;
2197         u64 bytenr;
2198         u64 ptr_gen;
2199         struct btrfs_fs_info *fs_info = root->fs_info;
2200         struct extent_buffer *next;
2201         struct extent_buffer *cur;
2202         int ret, err = 0;
2203         u64 refs;
2204
2205         WARN_ON(*level < 0);
2206         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2207
2208         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2209                 refs = nrefs->refs[*level];
2210                 ret = 0;
2211         } else {
2212                 ret = btrfs_lookup_extent_info(NULL, root,
2213                                        path->nodes[*level]->start,
2214                                        *level, 1, &refs, NULL);
2215                 if (ret < 0) {
2216                         err = ret;
2217                         goto out;
2218                 }
2219                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2220                 nrefs->refs[*level] = refs;
2221         }
2222
2223         if (refs > 1) {
2224                 ret = enter_shared_node(root, path->nodes[*level]->start,
2225                                         refs, wc, *level);
2226                 if (ret > 0) {
2227                         err = ret;
2228                         goto out;
2229                 }
2230         }
2231
2232         while (*level >= 0) {
2233                 WARN_ON(*level < 0);
2234                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2235                 cur = path->nodes[*level];
2236
2237                 if (btrfs_header_level(cur) != *level)
2238                         WARN_ON(1);
2239
2240                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2241                         break;
2242                 if (*level == 0) {
2243                         ret = process_one_leaf(root, cur, wc);
2244                         if (ret < 0)
2245                                 err = ret;
2246                         break;
2247                 }
2248                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2249                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2250
2251                 if (bytenr == nrefs->bytenr[*level - 1]) {
2252                         refs = nrefs->refs[*level - 1];
2253                 } else {
2254                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2255                                         *level - 1, 1, &refs, NULL);
2256                         if (ret < 0) {
2257                                 refs = 0;
2258                         } else {
2259                                 nrefs->bytenr[*level - 1] = bytenr;
2260                                 nrefs->refs[*level - 1] = refs;
2261                         }
2262                 }
2263
2264                 if (refs > 1) {
2265                         ret = enter_shared_node(root, bytenr, refs,
2266                                                 wc, *level - 1);
2267                         if (ret > 0) {
2268                                 path->slots[*level]++;
2269                                 continue;
2270                         }
2271                 }
2272
2273                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2274                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275                         free_extent_buffer(next);
2276                         reada_walk_down(root, cur, path->slots[*level]);
2277                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2278                         if (!extent_buffer_uptodate(next)) {
2279                                 struct btrfs_key node_key;
2280
2281                                 btrfs_node_key_to_cpu(path->nodes[*level],
2282                                                       &node_key,
2283                                                       path->slots[*level]);
2284                                 btrfs_add_corrupt_extent_record(root->fs_info,
2285                                                 &node_key,
2286                                                 path->nodes[*level]->start,
2287                                                 root->fs_info->nodesize,
2288                                                 *level);
2289                                 err = -EIO;
2290                                 goto out;
2291                         }
2292                 }
2293
2294                 ret = check_child_node(cur, path->slots[*level], next);
2295                 if (ret) {
2296                         free_extent_buffer(next);
2297                         err = ret;
2298                         goto out;
2299                 }
2300
2301                 if (btrfs_is_leaf(next))
2302                         status = btrfs_check_leaf(root, NULL, next);
2303                 else
2304                         status = btrfs_check_node(root, NULL, next);
2305                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2306                         free_extent_buffer(next);
2307                         err = -EIO;
2308                         goto out;
2309                 }
2310
2311                 *level = *level - 1;
2312                 free_extent_buffer(path->nodes[*level]);
2313                 path->nodes[*level] = next;
2314                 path->slots[*level] = 0;
2315         }
2316 out:
2317         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2318         return err;
2319 }
2320
2321 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2322                             unsigned int ext_ref);
2323
2324 /*
2325  * Returns >0  Found error, should continue
2326  * Returns <0  Fatal error, must exit the whole check
2327  * Returns 0   No errors found
2328  */
2329 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2330                              int *level, struct node_refs *nrefs, int ext_ref)
2331 {
2332         enum btrfs_tree_block_status status;
2333         u64 bytenr;
2334         u64 ptr_gen;
2335         struct btrfs_fs_info *fs_info = root->fs_info;
2336         struct extent_buffer *next;
2337         struct extent_buffer *cur;
2338         int ret;
2339
2340         WARN_ON(*level < 0);
2341         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2342
2343         ret = update_nodes_refs(root, path->nodes[*level]->start,
2344                                 nrefs, *level);
2345         if (ret < 0)
2346                 return ret;
2347
2348         while (*level >= 0) {
2349                 WARN_ON(*level < 0);
2350                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2351                 cur = path->nodes[*level];
2352
2353                 if (btrfs_header_level(cur) != *level)
2354                         WARN_ON(1);
2355
2356                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2357                         break;
2358                 /* Don't forgot to check leaf/node validation */
2359                 if (*level == 0) {
2360                         ret = btrfs_check_leaf(root, NULL, cur);
2361                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2362                                 ret = -EIO;
2363                                 break;
2364                         }
2365                         ret = process_one_leaf_v2(root, path, nrefs,
2366                                                   level, ext_ref);
2367                         break;
2368                 } else {
2369                         ret = btrfs_check_node(root, NULL, cur);
2370                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2371                                 ret = -EIO;
2372                                 break;
2373                         }
2374                 }
2375                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2376                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2377
2378                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2379                 if (ret)
2380                         break;
2381                 if (!nrefs->need_check[*level - 1]) {
2382                         path->slots[*level]++;
2383                         continue;
2384                 }
2385
2386                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2387                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2388                         free_extent_buffer(next);
2389                         reada_walk_down(root, cur, path->slots[*level]);
2390                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2391                         if (!extent_buffer_uptodate(next)) {
2392                                 struct btrfs_key node_key;
2393
2394                                 btrfs_node_key_to_cpu(path->nodes[*level],
2395                                                       &node_key,
2396                                                       path->slots[*level]);
2397                                 btrfs_add_corrupt_extent_record(fs_info,
2398                                                 &node_key,
2399                                                 path->nodes[*level]->start,
2400                                                 fs_info->nodesize,
2401                                                 *level);
2402                                 ret = -EIO;
2403                                 break;
2404                         }
2405                 }
2406
2407                 ret = check_child_node(cur, path->slots[*level], next);
2408                 if (ret < 0) 
2409                         break;
2410
2411                 if (btrfs_is_leaf(next))
2412                         status = btrfs_check_leaf(root, NULL, next);
2413                 else
2414                         status = btrfs_check_node(root, NULL, next);
2415                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2416                         free_extent_buffer(next);
2417                         ret = -EIO;
2418                         break;
2419                 }
2420
2421                 *level = *level - 1;
2422                 free_extent_buffer(path->nodes[*level]);
2423                 path->nodes[*level] = next;
2424                 path->slots[*level] = 0;
2425         }
2426         return ret;
2427 }
2428
2429 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2430                         struct walk_control *wc, int *level)
2431 {
2432         int i;
2433         struct extent_buffer *leaf;
2434
2435         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2436                 leaf = path->nodes[i];
2437                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2438                         path->slots[i]++;
2439                         *level = i;
2440                         return 0;
2441                 } else {
2442                         free_extent_buffer(path->nodes[*level]);
2443                         path->nodes[*level] = NULL;
2444                         BUG_ON(*level > wc->active_node);
2445                         if (*level == wc->active_node)
2446                                 leave_shared_node(root, wc, *level);
2447                         *level = i + 1;
2448                 }
2449         }
2450         return 1;
2451 }
2452
2453 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2454                            int *level)
2455 {
2456         int i;
2457         struct extent_buffer *leaf;
2458
2459         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2460                 leaf = path->nodes[i];
2461                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2462                         path->slots[i]++;
2463                         *level = i;
2464                         return 0;
2465                 } else {
2466                         free_extent_buffer(path->nodes[*level]);
2467                         path->nodes[*level] = NULL;
2468                         *level = i + 1;
2469                 }
2470         }
2471         return 1;
2472 }
2473
2474 static int check_root_dir(struct inode_record *rec)
2475 {
2476         struct inode_backref *backref;
2477         int ret = -1;
2478
2479         if (!rec->found_inode_item || rec->errors)
2480                 goto out;
2481         if (rec->nlink != 1 || rec->found_link != 0)
2482                 goto out;
2483         if (list_empty(&rec->backrefs))
2484                 goto out;
2485         backref = to_inode_backref(rec->backrefs.next);
2486         if (!backref->found_inode_ref)
2487                 goto out;
2488         if (backref->index != 0 || backref->namelen != 2 ||
2489             memcmp(backref->name, "..", 2))
2490                 goto out;
2491         if (backref->found_dir_index || backref->found_dir_item)
2492                 goto out;
2493         ret = 0;
2494 out:
2495         return ret;
2496 }
2497
2498 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2499                               struct btrfs_root *root, struct btrfs_path *path,
2500                               struct inode_record *rec)
2501 {
2502         struct btrfs_inode_item *ei;
2503         struct btrfs_key key;
2504         int ret;
2505
2506         key.objectid = rec->ino;
2507         key.type = BTRFS_INODE_ITEM_KEY;
2508         key.offset = (u64)-1;
2509
2510         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2511         if (ret < 0)
2512                 goto out;
2513         if (ret) {
2514                 if (!path->slots[0]) {
2515                         ret = -ENOENT;
2516                         goto out;
2517                 }
2518                 path->slots[0]--;
2519                 ret = 0;
2520         }
2521         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2522         if (key.objectid != rec->ino) {
2523                 ret = -ENOENT;
2524                 goto out;
2525         }
2526
2527         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2528                             struct btrfs_inode_item);
2529         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2530         btrfs_mark_buffer_dirty(path->nodes[0]);
2531         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2532         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2533                root->root_key.objectid);
2534 out:
2535         btrfs_release_path(path);
2536         return ret;
2537 }
2538
2539 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2540                                     struct btrfs_root *root,
2541                                     struct btrfs_path *path,
2542                                     struct inode_record *rec)
2543 {
2544         int ret;
2545
2546         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2547         btrfs_release_path(path);
2548         if (!ret)
2549                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2550         return ret;
2551 }
2552
2553 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2554                                struct btrfs_root *root,
2555                                struct btrfs_path *path,
2556                                struct inode_record *rec)
2557 {
2558         struct btrfs_inode_item *ei;
2559         struct btrfs_key key;
2560         int ret = 0;
2561
2562         key.objectid = rec->ino;
2563         key.type = BTRFS_INODE_ITEM_KEY;
2564         key.offset = 0;
2565
2566         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2567         if (ret) {
2568                 if (ret > 0)
2569                         ret = -ENOENT;
2570                 goto out;
2571         }
2572
2573         /* Since ret == 0, no need to check anything */
2574         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2575                             struct btrfs_inode_item);
2576         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2577         btrfs_mark_buffer_dirty(path->nodes[0]);
2578         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2579         printf("reset nbytes for ino %llu root %llu\n",
2580                rec->ino, root->root_key.objectid);
2581 out:
2582         btrfs_release_path(path);
2583         return ret;
2584 }
2585
2586 static int add_missing_dir_index(struct btrfs_root *root,
2587                                  struct cache_tree *inode_cache,
2588                                  struct inode_record *rec,
2589                                  struct inode_backref *backref)
2590 {
2591         struct btrfs_path path;
2592         struct btrfs_trans_handle *trans;
2593         struct btrfs_dir_item *dir_item;
2594         struct extent_buffer *leaf;
2595         struct btrfs_key key;
2596         struct btrfs_disk_key disk_key;
2597         struct inode_record *dir_rec;
2598         unsigned long name_ptr;
2599         u32 data_size = sizeof(*dir_item) + backref->namelen;
2600         int ret;
2601
2602         trans = btrfs_start_transaction(root, 1);
2603         if (IS_ERR(trans))
2604                 return PTR_ERR(trans);
2605
2606         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2607                 (unsigned long long)rec->ino);
2608
2609         btrfs_init_path(&path);
2610         key.objectid = backref->dir;
2611         key.type = BTRFS_DIR_INDEX_KEY;
2612         key.offset = backref->index;
2613         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2614         BUG_ON(ret);
2615
2616         leaf = path.nodes[0];
2617         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2618
2619         disk_key.objectid = cpu_to_le64(rec->ino);
2620         disk_key.type = BTRFS_INODE_ITEM_KEY;
2621         disk_key.offset = 0;
2622
2623         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2624         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2625         btrfs_set_dir_data_len(leaf, dir_item, 0);
2626         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2627         name_ptr = (unsigned long)(dir_item + 1);
2628         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2629         btrfs_mark_buffer_dirty(leaf);
2630         btrfs_release_path(&path);
2631         btrfs_commit_transaction(trans, root);
2632
2633         backref->found_dir_index = 1;
2634         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2635         BUG_ON(IS_ERR(dir_rec));
2636         if (!dir_rec)
2637                 return 0;
2638         dir_rec->found_size += backref->namelen;
2639         if (dir_rec->found_size == dir_rec->isize &&
2640             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2641                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2642         if (dir_rec->found_size != dir_rec->isize)
2643                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2644
2645         return 0;
2646 }
2647
2648 static int delete_dir_index(struct btrfs_root *root,
2649                             struct inode_backref *backref)
2650 {
2651         struct btrfs_trans_handle *trans;
2652         struct btrfs_dir_item *di;
2653         struct btrfs_path path;
2654         int ret = 0;
2655
2656         trans = btrfs_start_transaction(root, 1);
2657         if (IS_ERR(trans))
2658                 return PTR_ERR(trans);
2659
2660         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2661                 (unsigned long long)backref->dir,
2662                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2663                 (unsigned long long)root->objectid);
2664
2665         btrfs_init_path(&path);
2666         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2667                                     backref->name, backref->namelen,
2668                                     backref->index, -1);
2669         if (IS_ERR(di)) {
2670                 ret = PTR_ERR(di);
2671                 btrfs_release_path(&path);
2672                 btrfs_commit_transaction(trans, root);
2673                 if (ret == -ENOENT)
2674                         return 0;
2675                 return ret;
2676         }
2677
2678         if (!di)
2679                 ret = btrfs_del_item(trans, root, &path);
2680         else
2681                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2682         BUG_ON(ret);
2683         btrfs_release_path(&path);
2684         btrfs_commit_transaction(trans, root);
2685         return ret;
2686 }
2687
2688 static int create_inode_item(struct btrfs_root *root,
2689                              struct inode_record *rec,
2690                              int root_dir)
2691 {
2692         struct btrfs_trans_handle *trans;
2693         struct btrfs_inode_item inode_item;
2694         time_t now = time(NULL);
2695         int ret;
2696
2697         trans = btrfs_start_transaction(root, 1);
2698         if (IS_ERR(trans)) {
2699                 ret = PTR_ERR(trans);
2700                 return ret;
2701         }
2702
2703         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2704                 "be incomplete, please check permissions and content after "
2705                 "the fsck completes.\n", (unsigned long long)root->objectid,
2706                 (unsigned long long)rec->ino);
2707
2708         memset(&inode_item, 0, sizeof(inode_item));
2709         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2710         if (root_dir)
2711                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2712         else
2713                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2714         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2715         if (rec->found_dir_item) {
2716                 if (rec->found_file_extent)
2717                         fprintf(stderr, "root %llu inode %llu has both a dir "
2718                                 "item and extents, unsure if it is a dir or a "
2719                                 "regular file so setting it as a directory\n",
2720                                 (unsigned long long)root->objectid,
2721                                 (unsigned long long)rec->ino);
2722                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2723                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2724         } else if (!rec->found_dir_item) {
2725                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2726                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2727         }
2728         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2729         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2730         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2731         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2732         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2733         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2734         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2735         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2736
2737         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2738         BUG_ON(ret);
2739         btrfs_commit_transaction(trans, root);
2740         return 0;
2741 }
2742
2743 static int repair_inode_backrefs(struct btrfs_root *root,
2744                                  struct inode_record *rec,
2745                                  struct cache_tree *inode_cache,
2746                                  int delete)
2747 {
2748         struct inode_backref *tmp, *backref;
2749         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2750         int ret = 0;
2751         int repaired = 0;
2752
2753         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2754                 if (!delete && rec->ino == root_dirid) {
2755                         if (!rec->found_inode_item) {
2756                                 ret = create_inode_item(root, rec, 1);
2757                                 if (ret)
2758                                         break;
2759                                 repaired++;
2760                         }
2761                 }
2762
2763                 /* Index 0 for root dir's are special, don't mess with it */
2764                 if (rec->ino == root_dirid && backref->index == 0)
2765                         continue;
2766
2767                 if (delete &&
2768                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2769                      (backref->found_dir_index && backref->found_inode_ref &&
2770                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2771                         ret = delete_dir_index(root, backref);
2772                         if (ret)
2773                                 break;
2774                         repaired++;
2775                         list_del(&backref->list);
2776                         free(backref);
2777                         continue;
2778                 }
2779
2780                 if (!delete && !backref->found_dir_index &&
2781                     backref->found_dir_item && backref->found_inode_ref) {
2782                         ret = add_missing_dir_index(root, inode_cache, rec,
2783                                                     backref);
2784                         if (ret)
2785                                 break;
2786                         repaired++;
2787                         if (backref->found_dir_item &&
2788                             backref->found_dir_index) {
2789                                 if (!backref->errors &&
2790                                     backref->found_inode_ref) {
2791                                         list_del(&backref->list);
2792                                         free(backref);
2793                                         continue;
2794                                 }
2795                         }
2796                 }
2797
2798                 if (!delete && (!backref->found_dir_index &&
2799                                 !backref->found_dir_item &&
2800                                 backref->found_inode_ref)) {
2801                         struct btrfs_trans_handle *trans;
2802                         struct btrfs_key location;
2803
2804                         ret = check_dir_conflict(root, backref->name,
2805                                                  backref->namelen,
2806                                                  backref->dir,
2807                                                  backref->index);
2808                         if (ret) {
2809                                 /*
2810                                  * let nlink fixing routine to handle it,
2811                                  * which can do it better.
2812                                  */
2813                                 ret = 0;
2814                                 break;
2815                         }
2816                         location.objectid = rec->ino;
2817                         location.type = BTRFS_INODE_ITEM_KEY;
2818                         location.offset = 0;
2819
2820                         trans = btrfs_start_transaction(root, 1);
2821                         if (IS_ERR(trans)) {
2822                                 ret = PTR_ERR(trans);
2823                                 break;
2824                         }
2825                         fprintf(stderr, "adding missing dir index/item pair "
2826                                 "for inode %llu\n",
2827                                 (unsigned long long)rec->ino);
2828                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2829                                                     backref->namelen,
2830                                                     backref->dir, &location,
2831                                                     imode_to_type(rec->imode),
2832                                                     backref->index);
2833                         BUG_ON(ret);
2834                         btrfs_commit_transaction(trans, root);
2835                         repaired++;
2836                 }
2837
2838                 if (!delete && (backref->found_inode_ref &&
2839                                 backref->found_dir_index &&
2840                                 backref->found_dir_item &&
2841                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2842                                 !rec->found_inode_item)) {
2843                         ret = create_inode_item(root, rec, 0);
2844                         if (ret)
2845                                 break;
2846                         repaired++;
2847                 }
2848
2849         }
2850         return ret ? ret : repaired;
2851 }
2852
2853 /*
2854  * To determine the file type for nlink/inode_item repair
2855  *
2856  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2857  * Return -ENOENT if file type is not found.
2858  */
2859 static int find_file_type(struct inode_record *rec, u8 *type)
2860 {
2861         struct inode_backref *backref;
2862
2863         /* For inode item recovered case */
2864         if (rec->found_inode_item) {
2865                 *type = imode_to_type(rec->imode);
2866                 return 0;
2867         }
2868
2869         list_for_each_entry(backref, &rec->backrefs, list) {
2870                 if (backref->found_dir_index || backref->found_dir_item) {
2871                         *type = backref->filetype;
2872                         return 0;
2873                 }
2874         }
2875         return -ENOENT;
2876 }
2877
2878 /*
2879  * To determine the file name for nlink repair
2880  *
2881  * Return 0 if file name is found, set name and namelen.
2882  * Return -ENOENT if file name is not found.
2883  */
2884 static int find_file_name(struct inode_record *rec,
2885                           char *name, int *namelen)
2886 {
2887         struct inode_backref *backref;
2888
2889         list_for_each_entry(backref, &rec->backrefs, list) {
2890                 if (backref->found_dir_index || backref->found_dir_item ||
2891                     backref->found_inode_ref) {
2892                         memcpy(name, backref->name, backref->namelen);
2893                         *namelen = backref->namelen;
2894                         return 0;
2895                 }
2896         }
2897         return -ENOENT;
2898 }
2899
2900 /* Reset the nlink of the inode to the correct one */
2901 static int reset_nlink(struct btrfs_trans_handle *trans,
2902                        struct btrfs_root *root,
2903                        struct btrfs_path *path,
2904                        struct inode_record *rec)
2905 {
2906         struct inode_backref *backref;
2907         struct inode_backref *tmp;
2908         struct btrfs_key key;
2909         struct btrfs_inode_item *inode_item;
2910         int ret = 0;
2911
2912         /* We don't believe this either, reset it and iterate backref */
2913         rec->found_link = 0;
2914
2915         /* Remove all backref including the valid ones */
2916         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2917                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2918                                    backref->index, backref->name,
2919                                    backref->namelen, 0);
2920                 if (ret < 0)
2921                         goto out;
2922
2923                 /* remove invalid backref, so it won't be added back */
2924                 if (!(backref->found_dir_index &&
2925                       backref->found_dir_item &&
2926                       backref->found_inode_ref)) {
2927                         list_del(&backref->list);
2928                         free(backref);
2929                 } else {
2930                         rec->found_link++;
2931                 }
2932         }
2933
2934         /* Set nlink to 0 */
2935         key.objectid = rec->ino;
2936         key.type = BTRFS_INODE_ITEM_KEY;
2937         key.offset = 0;
2938         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2939         if (ret < 0)
2940                 goto out;
2941         if (ret > 0) {
2942                 ret = -ENOENT;
2943                 goto out;
2944         }
2945         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2946                                     struct btrfs_inode_item);
2947         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2948         btrfs_mark_buffer_dirty(path->nodes[0]);
2949         btrfs_release_path(path);
2950
2951         /*
2952          * Add back valid inode_ref/dir_item/dir_index,
2953          * add_link() will handle the nlink inc, so new nlink must be correct
2954          */
2955         list_for_each_entry(backref, &rec->backrefs, list) {
2956                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2957                                      backref->name, backref->namelen,
2958                                      backref->filetype, &backref->index, 1);
2959                 if (ret < 0)
2960                         goto out;
2961         }
2962 out:
2963         btrfs_release_path(path);
2964         return ret;
2965 }
2966
2967 static int get_highest_inode(struct btrfs_trans_handle *trans,
2968                                 struct btrfs_root *root,
2969                                 struct btrfs_path *path,
2970                                 u64 *highest_ino)
2971 {
2972         struct btrfs_key key, found_key;
2973         int ret;
2974
2975         btrfs_init_path(path);
2976         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2977         key.offset = -1;
2978         key.type = BTRFS_INODE_ITEM_KEY;
2979         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2980         if (ret == 1) {
2981                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2982                                 path->slots[0] - 1);
2983                 *highest_ino = found_key.objectid;
2984                 ret = 0;
2985         }
2986         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2987                 ret = -EOVERFLOW;
2988         btrfs_release_path(path);
2989         return ret;
2990 }
2991
2992 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2993                                struct btrfs_root *root,
2994                                struct btrfs_path *path,
2995                                struct inode_record *rec)
2996 {
2997         char *dir_name = "lost+found";
2998         char namebuf[BTRFS_NAME_LEN] = {0};
2999         u64 lost_found_ino;
3000         u32 mode = 0700;
3001         u8 type = 0;
3002         int namelen = 0;
3003         int name_recovered = 0;
3004         int type_recovered = 0;
3005         int ret = 0;
3006
3007         /*
3008          * Get file name and type first before these invalid inode ref
3009          * are deleted by remove_all_invalid_backref()
3010          */
3011         name_recovered = !find_file_name(rec, namebuf, &namelen);
3012         type_recovered = !find_file_type(rec, &type);
3013
3014         if (!name_recovered) {
3015                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3016                        rec->ino, rec->ino);
3017                 namelen = count_digits(rec->ino);
3018                 sprintf(namebuf, "%llu", rec->ino);
3019                 name_recovered = 1;
3020         }
3021         if (!type_recovered) {
3022                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3023                        rec->ino);
3024                 type = BTRFS_FT_REG_FILE;
3025                 type_recovered = 1;
3026         }
3027
3028         ret = reset_nlink(trans, root, path, rec);
3029         if (ret < 0) {
3030                 fprintf(stderr,
3031                         "Failed to reset nlink for inode %llu: %s\n",
3032                         rec->ino, strerror(-ret));
3033                 goto out;
3034         }
3035
3036         if (rec->found_link == 0) {
3037                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3038                 if (ret < 0)
3039                         goto out;
3040                 lost_found_ino++;
3041                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3042                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3043                                   mode);
3044                 if (ret < 0) {
3045                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3046                                 dir_name, strerror(-ret));
3047                         goto out;
3048                 }
3049                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3050                                      namebuf, namelen, type, NULL, 1);
3051                 /*
3052                  * Add ".INO" suffix several times to handle case where
3053                  * "FILENAME.INO" is already taken by another file.
3054                  */
3055                 while (ret == -EEXIST) {
3056                         /*
3057                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3058                          */
3059                         if (namelen + count_digits(rec->ino) + 1 >
3060                             BTRFS_NAME_LEN) {
3061                                 ret = -EFBIG;
3062                                 goto out;
3063                         }
3064                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3065                                  ".%llu", rec->ino);
3066                         namelen += count_digits(rec->ino) + 1;
3067                         ret = btrfs_add_link(trans, root, rec->ino,
3068                                              lost_found_ino, namebuf,
3069                                              namelen, type, NULL, 1);
3070                 }
3071                 if (ret < 0) {
3072                         fprintf(stderr,
3073                                 "Failed to link the inode %llu to %s dir: %s\n",
3074                                 rec->ino, dir_name, strerror(-ret));
3075                         goto out;
3076                 }
3077                 /*
3078                  * Just increase the found_link, don't actually add the
3079                  * backref. This will make things easier and this inode
3080                  * record will be freed after the repair is done.
3081                  * So fsck will not report problem about this inode.
3082                  */
3083                 rec->found_link++;
3084                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3085                        namelen, namebuf, dir_name);
3086         }
3087         printf("Fixed the nlink of inode %llu\n", rec->ino);
3088 out:
3089         /*
3090          * Clear the flag anyway, or we will loop forever for the same inode
3091          * as it will not be removed from the bad inode list and the dead loop
3092          * happens.
3093          */
3094         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3095         btrfs_release_path(path);
3096         return ret;
3097 }
3098
3099 /*
3100  * Check if there is any normal(reg or prealloc) file extent for given
3101  * ino.
3102  * This is used to determine the file type when neither its dir_index/item or
3103  * inode_item exists.
3104  *
3105  * This will *NOT* report error, if any error happens, just consider it does
3106  * not have any normal file extent.
3107  */
3108 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3109 {
3110         struct btrfs_path path;
3111         struct btrfs_key key;
3112         struct btrfs_key found_key;
3113         struct btrfs_file_extent_item *fi;
3114         u8 type;
3115         int ret = 0;
3116
3117         btrfs_init_path(&path);
3118         key.objectid = ino;
3119         key.type = BTRFS_EXTENT_DATA_KEY;
3120         key.offset = 0;
3121
3122         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3123         if (ret < 0) {
3124                 ret = 0;
3125                 goto out;
3126         }
3127         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3128                 ret = btrfs_next_leaf(root, &path);
3129                 if (ret) {
3130                         ret = 0;
3131                         goto out;
3132                 }
3133         }
3134         while (1) {
3135                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3136                                       path.slots[0]);
3137                 if (found_key.objectid != ino ||
3138                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3139                         break;
3140                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3141                                     struct btrfs_file_extent_item);
3142                 type = btrfs_file_extent_type(path.nodes[0], fi);
3143                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3144                         ret = 1;
3145                         goto out;
3146                 }
3147         }
3148 out:
3149         btrfs_release_path(&path);
3150         return ret;
3151 }
3152
3153 static u32 btrfs_type_to_imode(u8 type)
3154 {
3155         static u32 imode_by_btrfs_type[] = {
3156                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3157                 [BTRFS_FT_DIR]          = S_IFDIR,
3158                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3159                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3160                 [BTRFS_FT_FIFO]         = S_IFIFO,
3161                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3162                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3163         };
3164
3165         return imode_by_btrfs_type[(type)];
3166 }
3167
3168 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3169                                 struct btrfs_root *root,
3170                                 struct btrfs_path *path,
3171                                 struct inode_record *rec)
3172 {
3173         u8 filetype;
3174         u32 mode = 0700;
3175         int type_recovered = 0;
3176         int ret = 0;
3177
3178         printf("Trying to rebuild inode:%llu\n", rec->ino);
3179
3180         type_recovered = !find_file_type(rec, &filetype);
3181
3182         /*
3183          * Try to determine inode type if type not found.
3184          *
3185          * For found regular file extent, it must be FILE.
3186          * For found dir_item/index, it must be DIR.
3187          *
3188          * For undetermined one, use FILE as fallback.
3189          *
3190          * TODO:
3191          * 1. If found backref(inode_index/item is already handled) to it,
3192          *    it must be DIR.
3193          *    Need new inode-inode ref structure to allow search for that.
3194          */
3195         if (!type_recovered) {
3196                 if (rec->found_file_extent &&
3197                     find_normal_file_extent(root, rec->ino)) {
3198                         type_recovered = 1;
3199                         filetype = BTRFS_FT_REG_FILE;
3200                 } else if (rec->found_dir_item) {
3201                         type_recovered = 1;
3202                         filetype = BTRFS_FT_DIR;
3203                 } else if (!list_empty(&rec->orphan_extents)) {
3204                         type_recovered = 1;
3205                         filetype = BTRFS_FT_REG_FILE;
3206                 } else{
3207                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3208                                rec->ino);
3209                         type_recovered = 1;
3210                         filetype = BTRFS_FT_REG_FILE;
3211                 }
3212         }
3213
3214         ret = btrfs_new_inode(trans, root, rec->ino,
3215                               mode | btrfs_type_to_imode(filetype));
3216         if (ret < 0)
3217                 goto out;
3218
3219         /*
3220          * Here inode rebuild is done, we only rebuild the inode item,
3221          * don't repair the nlink(like move to lost+found).
3222          * That is the job of nlink repair.
3223          *
3224          * We just fill the record and return
3225          */
3226         rec->found_dir_item = 1;
3227         rec->imode = mode | btrfs_type_to_imode(filetype);
3228         rec->nlink = 0;
3229         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3230         /* Ensure the inode_nlinks repair function will be called */
3231         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3232 out:
3233         return ret;
3234 }
3235
3236 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3237                                       struct btrfs_root *root,
3238                                       struct btrfs_path *path,
3239                                       struct inode_record *rec)
3240 {
3241         struct orphan_data_extent *orphan;
3242         struct orphan_data_extent *tmp;
3243         int ret = 0;
3244
3245         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3246                 /*
3247                  * Check for conflicting file extents
3248                  *
3249                  * Here we don't know whether the extents is compressed or not,
3250                  * so we can only assume it not compressed nor data offset,
3251                  * and use its disk_len as extent length.
3252                  */
3253                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3254                                        orphan->offset, orphan->disk_len, 0);
3255                 btrfs_release_path(path);
3256                 if (ret < 0)
3257                         goto out;
3258                 if (!ret) {
3259                         fprintf(stderr,
3260                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3261                                 orphan->disk_bytenr, orphan->disk_len);
3262                         ret = btrfs_free_extent(trans,
3263                                         root->fs_info->extent_root,
3264                                         orphan->disk_bytenr, orphan->disk_len,
3265                                         0, root->objectid, orphan->objectid,
3266                                         orphan->offset);
3267                         if (ret < 0)
3268                                 goto out;
3269                 }
3270                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3271                                 orphan->offset, orphan->disk_bytenr,
3272                                 orphan->disk_len, orphan->disk_len);
3273                 if (ret < 0)
3274                         goto out;
3275
3276                 /* Update file size info */
3277                 rec->found_size += orphan->disk_len;
3278                 if (rec->found_size == rec->nbytes)
3279                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3280
3281                 /* Update the file extent hole info too */
3282                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3283                                            orphan->disk_len);
3284                 if (ret < 0)
3285                         goto out;
3286                 if (RB_EMPTY_ROOT(&rec->holes))
3287                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3288
3289                 list_del(&orphan->list);
3290                 free(orphan);
3291         }
3292         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3293 out:
3294         return ret;
3295 }
3296
3297 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3298                                         struct btrfs_root *root,
3299                                         struct btrfs_path *path,
3300                                         struct inode_record *rec)
3301 {
3302         struct rb_node *node;
3303         struct file_extent_hole *hole;
3304         int found = 0;
3305         int ret = 0;
3306
3307         node = rb_first(&rec->holes);
3308
3309         while (node) {
3310                 found = 1;
3311                 hole = rb_entry(node, struct file_extent_hole, node);
3312                 ret = btrfs_punch_hole(trans, root, rec->ino,
3313                                        hole->start, hole->len);
3314                 if (ret < 0)
3315                         goto out;
3316                 ret = del_file_extent_hole(&rec->holes, hole->start,
3317                                            hole->len);
3318                 if (ret < 0)
3319                         goto out;
3320                 if (RB_EMPTY_ROOT(&rec->holes))
3321                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3322                 node = rb_first(&rec->holes);
3323         }
3324         /* special case for a file losing all its file extent */
3325         if (!found) {
3326                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3327                                        round_up(rec->isize,
3328                                                 root->fs_info->sectorsize));
3329                 if (ret < 0)
3330                         goto out;
3331         }
3332         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3333                rec->ino, root->objectid);
3334 out:
3335         return ret;
3336 }
3337
3338 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3339 {
3340         struct btrfs_trans_handle *trans;
3341         struct btrfs_path path;
3342         int ret = 0;
3343
3344         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3345                              I_ERR_NO_ORPHAN_ITEM |
3346                              I_ERR_LINK_COUNT_WRONG |
3347                              I_ERR_NO_INODE_ITEM |
3348                              I_ERR_FILE_EXTENT_ORPHAN |
3349                              I_ERR_FILE_EXTENT_DISCOUNT|
3350                              I_ERR_FILE_NBYTES_WRONG)))
3351                 return rec->errors;
3352
3353         /*
3354          * For nlink repair, it may create a dir and add link, so
3355          * 2 for parent(256)'s dir_index and dir_item
3356          * 2 for lost+found dir's inode_item and inode_ref
3357          * 1 for the new inode_ref of the file
3358          * 2 for lost+found dir's dir_index and dir_item for the file
3359          */
3360         trans = btrfs_start_transaction(root, 7);
3361         if (IS_ERR(trans))
3362                 return PTR_ERR(trans);
3363
3364         btrfs_init_path(&path);
3365         if (rec->errors & I_ERR_NO_INODE_ITEM)
3366                 ret = repair_inode_no_item(trans, root, &path, rec);
3367         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3368                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3369         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3370                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3371         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3372                 ret = repair_inode_isize(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3374                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3376                 ret = repair_inode_nlinks(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3378                 ret = repair_inode_nbytes(trans, root, &path, rec);
3379         btrfs_commit_transaction(trans, root);
3380         btrfs_release_path(&path);
3381         return ret;
3382 }
3383
3384 static int check_inode_recs(struct btrfs_root *root,
3385                             struct cache_tree *inode_cache)
3386 {
3387         struct cache_extent *cache;
3388         struct ptr_node *node;
3389         struct inode_record *rec;
3390         struct inode_backref *backref;
3391         int stage = 0;
3392         int ret = 0;
3393         int err = 0;
3394         u64 error = 0;
3395         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3396
3397         if (btrfs_root_refs(&root->root_item) == 0) {
3398                 if (!cache_tree_empty(inode_cache))
3399                         fprintf(stderr, "warning line %d\n", __LINE__);
3400                 return 0;
3401         }
3402
3403         /*
3404          * We need to repair backrefs first because we could change some of the
3405          * errors in the inode recs.
3406          *
3407          * We also need to go through and delete invalid backrefs first and then
3408          * add the correct ones second.  We do this because we may get EEXIST
3409          * when adding back the correct index because we hadn't yet deleted the
3410          * invalid index.
3411          *
3412          * For example, if we were missing a dir index then the directories
3413          * isize would be wrong, so if we fixed the isize to what we thought it
3414          * would be and then fixed the backref we'd still have a invalid fs, so
3415          * we need to add back the dir index and then check to see if the isize
3416          * is still wrong.
3417          */
3418         while (stage < 3) {
3419                 stage++;
3420                 if (stage == 3 && !err)
3421                         break;
3422
3423                 cache = search_cache_extent(inode_cache, 0);
3424                 while (repair && cache) {
3425                         node = container_of(cache, struct ptr_node, cache);
3426                         rec = node->data;
3427                         cache = next_cache_extent(cache);
3428
3429                         /* Need to free everything up and rescan */
3430                         if (stage == 3) {
3431                                 remove_cache_extent(inode_cache, &node->cache);
3432                                 free(node);
3433                                 free_inode_rec(rec);
3434                                 continue;
3435                         }
3436
3437                         if (list_empty(&rec->backrefs))
3438                                 continue;
3439
3440                         ret = repair_inode_backrefs(root, rec, inode_cache,
3441                                                     stage == 1);
3442                         if (ret < 0) {
3443                                 err = ret;
3444                                 stage = 2;
3445                                 break;
3446                         } if (ret > 0) {
3447                                 err = -EAGAIN;
3448                         }
3449                 }
3450         }
3451         if (err)
3452                 return err;
3453
3454         rec = get_inode_rec(inode_cache, root_dirid, 0);
3455         BUG_ON(IS_ERR(rec));
3456         if (rec) {
3457                 ret = check_root_dir(rec);
3458                 if (ret) {
3459                         fprintf(stderr, "root %llu root dir %llu error\n",
3460                                 (unsigned long long)root->root_key.objectid,
3461                                 (unsigned long long)root_dirid);
3462                         print_inode_error(root, rec);
3463                         error++;
3464                 }
3465         } else {
3466                 if (repair) {
3467                         struct btrfs_trans_handle *trans;
3468
3469                         trans = btrfs_start_transaction(root, 1);
3470                         if (IS_ERR(trans)) {
3471                                 err = PTR_ERR(trans);
3472                                 return err;
3473                         }
3474
3475                         fprintf(stderr,
3476                                 "root %llu missing its root dir, recreating\n",
3477                                 (unsigned long long)root->objectid);
3478
3479                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3480                         BUG_ON(ret);
3481
3482                         btrfs_commit_transaction(trans, root);
3483                         return -EAGAIN;
3484                 }
3485
3486                 fprintf(stderr, "root %llu root dir %llu not found\n",
3487                         (unsigned long long)root->root_key.objectid,
3488                         (unsigned long long)root_dirid);
3489         }
3490
3491         while (1) {
3492                 cache = search_cache_extent(inode_cache, 0);
3493                 if (!cache)
3494                         break;
3495                 node = container_of(cache, struct ptr_node, cache);
3496                 rec = node->data;
3497                 remove_cache_extent(inode_cache, &node->cache);
3498                 free(node);
3499                 if (rec->ino == root_dirid ||
3500                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3501                         free_inode_rec(rec);
3502                         continue;
3503                 }
3504
3505                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3506                         ret = check_orphan_item(root, rec->ino);
3507                         if (ret == 0)
3508                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3509                         if (can_free_inode_rec(rec)) {
3510                                 free_inode_rec(rec);
3511                                 continue;
3512                         }
3513                 }
3514
3515                 if (!rec->found_inode_item)
3516                         rec->errors |= I_ERR_NO_INODE_ITEM;
3517                 if (rec->found_link != rec->nlink)
3518                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3519                 if (repair) {
3520                         ret = try_repair_inode(root, rec);
3521                         if (ret == 0 && can_free_inode_rec(rec)) {
3522                                 free_inode_rec(rec);
3523                                 continue;
3524                         }
3525                         ret = 0;
3526                 }
3527
3528                 if (!(repair && ret == 0))
3529                         error++;
3530                 print_inode_error(root, rec);
3531                 list_for_each_entry(backref, &rec->backrefs, list) {
3532                         if (!backref->found_dir_item)
3533                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3534                         if (!backref->found_dir_index)
3535                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3536                         if (!backref->found_inode_ref)
3537                                 backref->errors |= REF_ERR_NO_INODE_REF;
3538                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3539                                 " namelen %u name %s filetype %d errors %x",
3540                                 (unsigned long long)backref->dir,
3541                                 (unsigned long long)backref->index,
3542                                 backref->namelen, backref->name,
3543                                 backref->filetype, backref->errors);
3544                         print_ref_error(backref->errors);
3545                 }
3546                 free_inode_rec(rec);
3547         }
3548         return (error > 0) ? -1 : 0;
3549 }
3550
3551 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3552                                         u64 objectid)
3553 {
3554         struct cache_extent *cache;
3555         struct root_record *rec = NULL;
3556         int ret;
3557
3558         cache = lookup_cache_extent(root_cache, objectid, 1);
3559         if (cache) {
3560                 rec = container_of(cache, struct root_record, cache);
3561         } else {
3562                 rec = calloc(1, sizeof(*rec));
3563                 if (!rec)
3564                         return ERR_PTR(-ENOMEM);
3565                 rec->objectid = objectid;
3566                 INIT_LIST_HEAD(&rec->backrefs);
3567                 rec->cache.start = objectid;
3568                 rec->cache.size = 1;
3569
3570                 ret = insert_cache_extent(root_cache, &rec->cache);
3571                 if (ret)
3572                         return ERR_PTR(-EEXIST);
3573         }
3574         return rec;
3575 }
3576
3577 static struct root_backref *get_root_backref(struct root_record *rec,
3578                                              u64 ref_root, u64 dir, u64 index,
3579                                              const char *name, int namelen)
3580 {
3581         struct root_backref *backref;
3582
3583         list_for_each_entry(backref, &rec->backrefs, list) {
3584                 if (backref->ref_root != ref_root || backref->dir != dir ||
3585                     backref->namelen != namelen)
3586                         continue;
3587                 if (memcmp(name, backref->name, namelen))
3588                         continue;
3589                 return backref;
3590         }
3591
3592         backref = calloc(1, sizeof(*backref) + namelen + 1);
3593         if (!backref)
3594                 return NULL;
3595         backref->ref_root = ref_root;
3596         backref->dir = dir;
3597         backref->index = index;
3598         backref->namelen = namelen;
3599         memcpy(backref->name, name, namelen);
3600         backref->name[namelen] = '\0';
3601         list_add_tail(&backref->list, &rec->backrefs);
3602         return backref;
3603 }
3604
3605 static void free_root_record(struct cache_extent *cache)
3606 {
3607         struct root_record *rec;
3608         struct root_backref *backref;
3609
3610         rec = container_of(cache, struct root_record, cache);
3611         while (!list_empty(&rec->backrefs)) {
3612                 backref = to_root_backref(rec->backrefs.next);
3613                 list_del(&backref->list);
3614                 free(backref);
3615         }
3616
3617         free(rec);
3618 }
3619
3620 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3621
3622 static int add_root_backref(struct cache_tree *root_cache,
3623                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3624                             const char *name, int namelen,
3625                             int item_type, int errors)
3626 {
3627         struct root_record *rec;
3628         struct root_backref *backref;
3629
3630         rec = get_root_rec(root_cache, root_id);
3631         BUG_ON(IS_ERR(rec));
3632         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3633         BUG_ON(!backref);
3634
3635         backref->errors |= errors;
3636
3637         if (item_type != BTRFS_DIR_ITEM_KEY) {
3638                 if (backref->found_dir_index || backref->found_back_ref ||
3639                     backref->found_forward_ref) {
3640                         if (backref->index != index)
3641                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3642                 } else {
3643                         backref->index = index;
3644                 }
3645         }
3646
3647         if (item_type == BTRFS_DIR_ITEM_KEY) {
3648                 if (backref->found_forward_ref)
3649                         rec->found_ref++;
3650                 backref->found_dir_item = 1;
3651         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3652                 backref->found_dir_index = 1;
3653         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3654                 if (backref->found_forward_ref)
3655                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3656                 else if (backref->found_dir_item)
3657                         rec->found_ref++;
3658                 backref->found_forward_ref = 1;
3659         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3660                 if (backref->found_back_ref)
3661                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3662                 backref->found_back_ref = 1;
3663         } else {
3664                 BUG_ON(1);
3665         }
3666
3667         if (backref->found_forward_ref && backref->found_dir_item)
3668                 backref->reachable = 1;
3669         return 0;
3670 }
3671
3672 static int merge_root_recs(struct btrfs_root *root,
3673                            struct cache_tree *src_cache,
3674                            struct cache_tree *dst_cache)
3675 {
3676         struct cache_extent *cache;
3677         struct ptr_node *node;
3678         struct inode_record *rec;
3679         struct inode_backref *backref;
3680         int ret = 0;
3681
3682         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3683                 free_inode_recs_tree(src_cache);
3684                 return 0;
3685         }
3686
3687         while (1) {
3688                 cache = search_cache_extent(src_cache, 0);
3689                 if (!cache)
3690                         break;
3691                 node = container_of(cache, struct ptr_node, cache);
3692                 rec = node->data;
3693                 remove_cache_extent(src_cache, &node->cache);
3694                 free(node);
3695
3696                 ret = is_child_root(root, root->objectid, rec->ino);
3697                 if (ret < 0)
3698                         break;
3699                 else if (ret == 0)
3700                         goto skip;
3701
3702                 list_for_each_entry(backref, &rec->backrefs, list) {
3703                         BUG_ON(backref->found_inode_ref);
3704                         if (backref->found_dir_item)
3705                                 add_root_backref(dst_cache, rec->ino,
3706                                         root->root_key.objectid, backref->dir,
3707                                         backref->index, backref->name,
3708                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3709                                         backref->errors);
3710                         if (backref->found_dir_index)
3711                                 add_root_backref(dst_cache, rec->ino,
3712                                         root->root_key.objectid, backref->dir,
3713                                         backref->index, backref->name,
3714                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3715                                         backref->errors);
3716                 }
3717 skip:
3718                 free_inode_rec(rec);
3719         }
3720         if (ret < 0)
3721                 return ret;
3722         return 0;
3723 }
3724
3725 static int check_root_refs(struct btrfs_root *root,
3726                            struct cache_tree *root_cache)
3727 {
3728         struct root_record *rec;
3729         struct root_record *ref_root;
3730         struct root_backref *backref;
3731         struct cache_extent *cache;
3732         int loop = 1;
3733         int ret;
3734         int error;
3735         int errors = 0;
3736
3737         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3738         BUG_ON(IS_ERR(rec));
3739         rec->found_ref = 1;
3740
3741         /* fixme: this can not detect circular references */
3742         while (loop) {
3743                 loop = 0;
3744                 cache = search_cache_extent(root_cache, 0);
3745                 while (1) {
3746                         if (!cache)
3747                                 break;
3748                         rec = container_of(cache, struct root_record, cache);
3749                         cache = next_cache_extent(cache);
3750
3751                         if (rec->found_ref == 0)
3752                                 continue;
3753
3754                         list_for_each_entry(backref, &rec->backrefs, list) {
3755                                 if (!backref->reachable)
3756                                         continue;
3757
3758                                 ref_root = get_root_rec(root_cache,
3759                                                         backref->ref_root);
3760                                 BUG_ON(IS_ERR(ref_root));
3761                                 if (ref_root->found_ref > 0)
3762                                         continue;
3763
3764                                 backref->reachable = 0;
3765                                 rec->found_ref--;
3766                                 if (rec->found_ref == 0)
3767                                         loop = 1;
3768                         }
3769                 }
3770         }
3771
3772         cache = search_cache_extent(root_cache, 0);
3773         while (1) {
3774                 if (!cache)
3775                         break;
3776                 rec = container_of(cache, struct root_record, cache);
3777                 cache = next_cache_extent(cache);
3778
3779                 if (rec->found_ref == 0 &&
3780                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3781                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3782                         ret = check_orphan_item(root->fs_info->tree_root,
3783                                                 rec->objectid);
3784                         if (ret == 0)
3785                                 continue;
3786
3787                         /*
3788                          * If we don't have a root item then we likely just have
3789                          * a dir item in a snapshot for this root but no actual
3790                          * ref key or anything so it's meaningless.
3791                          */
3792                         if (!rec->found_root_item)
3793                                 continue;
3794                         errors++;
3795                         fprintf(stderr, "fs tree %llu not referenced\n",
3796                                 (unsigned long long)rec->objectid);
3797                 }
3798
3799                 error = 0;
3800                 if (rec->found_ref > 0 && !rec->found_root_item)
3801                         error = 1;
3802                 list_for_each_entry(backref, &rec->backrefs, list) {
3803                         if (!backref->found_dir_item)
3804                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3805                         if (!backref->found_dir_index)
3806                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3807                         if (!backref->found_back_ref)
3808                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3809                         if (!backref->found_forward_ref)
3810                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3811                         if (backref->reachable && backref->errors)
3812                                 error = 1;
3813                 }
3814                 if (!error)
3815                         continue;
3816
3817                 errors++;
3818                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3819                         (unsigned long long)rec->objectid, rec->found_ref,
3820                          rec->found_root_item ? "" : "not found");
3821
3822                 list_for_each_entry(backref, &rec->backrefs, list) {
3823                         if (!backref->reachable)
3824                                 continue;
3825                         if (!backref->errors && rec->found_root_item)
3826                                 continue;
3827                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3828                                 " index %llu namelen %u name %s errors %x\n",
3829                                 (unsigned long long)backref->ref_root,
3830                                 (unsigned long long)backref->dir,
3831                                 (unsigned long long)backref->index,
3832                                 backref->namelen, backref->name,
3833                                 backref->errors);
3834                         print_ref_error(backref->errors);
3835                 }
3836         }
3837         return errors > 0 ? 1 : 0;
3838 }
3839
3840 static int process_root_ref(struct extent_buffer *eb, int slot,
3841                             struct btrfs_key *key,
3842                             struct cache_tree *root_cache)
3843 {
3844         u64 dirid;
3845         u64 index;
3846         u32 len;
3847         u32 name_len;
3848         struct btrfs_root_ref *ref;
3849         char namebuf[BTRFS_NAME_LEN];
3850         int error;
3851
3852         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3853
3854         dirid = btrfs_root_ref_dirid(eb, ref);
3855         index = btrfs_root_ref_sequence(eb, ref);
3856         name_len = btrfs_root_ref_name_len(eb, ref);
3857
3858         if (name_len <= BTRFS_NAME_LEN) {
3859                 len = name_len;
3860                 error = 0;
3861         } else {
3862                 len = BTRFS_NAME_LEN;
3863                 error = REF_ERR_NAME_TOO_LONG;
3864         }
3865         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3866
3867         if (key->type == BTRFS_ROOT_REF_KEY) {
3868                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3869                                  index, namebuf, len, key->type, error);
3870         } else {
3871                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3872                                  index, namebuf, len, key->type, error);
3873         }
3874         return 0;
3875 }
3876
3877 static void free_corrupt_block(struct cache_extent *cache)
3878 {
3879         struct btrfs_corrupt_block *corrupt;
3880
3881         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3882         free(corrupt);
3883 }
3884
3885 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3886
3887 /*
3888  * Repair the btree of the given root.
3889  *
3890  * The fix is to remove the node key in corrupt_blocks cache_tree.
3891  * and rebalance the tree.
3892  * After the fix, the btree should be writeable.
3893  */
3894 static int repair_btree(struct btrfs_root *root,
3895                         struct cache_tree *corrupt_blocks)
3896 {
3897         struct btrfs_trans_handle *trans;
3898         struct btrfs_path path;
3899         struct btrfs_corrupt_block *corrupt;
3900         struct cache_extent *cache;
3901         struct btrfs_key key;
3902         u64 offset;
3903         int level;
3904         int ret = 0;
3905
3906         if (cache_tree_empty(corrupt_blocks))
3907                 return 0;
3908
3909         trans = btrfs_start_transaction(root, 1);
3910         if (IS_ERR(trans)) {
3911                 ret = PTR_ERR(trans);
3912                 fprintf(stderr, "Error starting transaction: %s\n",
3913                         strerror(-ret));
3914                 return ret;
3915         }
3916         btrfs_init_path(&path);
3917         cache = first_cache_extent(corrupt_blocks);
3918         while (cache) {
3919                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3920                                        cache);
3921                 level = corrupt->level;
3922                 path.lowest_level = level;
3923                 key.objectid = corrupt->key.objectid;
3924                 key.type = corrupt->key.type;
3925                 key.offset = corrupt->key.offset;
3926
3927                 /*
3928                  * Here we don't want to do any tree balance, since it may
3929                  * cause a balance with corrupted brother leaf/node,
3930                  * so ins_len set to 0 here.
3931                  * Balance will be done after all corrupt node/leaf is deleted.
3932                  */
3933                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3934                 if (ret < 0)
3935                         goto out;
3936                 offset = btrfs_node_blockptr(path.nodes[level],
3937                                              path.slots[level]);
3938
3939                 /* Remove the ptr */
3940                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3941                 if (ret < 0)
3942                         goto out;
3943                 /*
3944                  * Remove the corresponding extent
3945                  * return value is not concerned.
3946                  */
3947                 btrfs_release_path(&path);
3948                 ret = btrfs_free_extent(trans, root, offset,
3949                                 root->fs_info->nodesize, 0,
3950                                 root->root_key.objectid, level - 1, 0);
3951                 cache = next_cache_extent(cache);
3952         }
3953
3954         /* Balance the btree using btrfs_search_slot() */
3955         cache = first_cache_extent(corrupt_blocks);
3956         while (cache) {
3957                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3958                                        cache);
3959                 memcpy(&key, &corrupt->key, sizeof(key));
3960                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3961                 if (ret < 0)
3962                         goto out;
3963                 /* return will always >0 since it won't find the item */
3964                 ret = 0;
3965                 btrfs_release_path(&path);
3966                 cache = next_cache_extent(cache);
3967         }
3968 out:
3969         btrfs_commit_transaction(trans, root);
3970         btrfs_release_path(&path);
3971         return ret;
3972 }
3973
3974 static int check_fs_root(struct btrfs_root *root,
3975                          struct cache_tree *root_cache,
3976                          struct walk_control *wc)
3977 {
3978         int ret = 0;
3979         int err = 0;
3980         int wret;
3981         int level;
3982         struct btrfs_path path;
3983         struct shared_node root_node;
3984         struct root_record *rec;
3985         struct btrfs_root_item *root_item = &root->root_item;
3986         struct cache_tree corrupt_blocks;
3987         struct orphan_data_extent *orphan;
3988         struct orphan_data_extent *tmp;
3989         enum btrfs_tree_block_status status;
3990         struct node_refs nrefs;
3991
3992         /*
3993          * Reuse the corrupt_block cache tree to record corrupted tree block
3994          *
3995          * Unlike the usage in extent tree check, here we do it in a per
3996          * fs/subvol tree base.
3997          */
3998         cache_tree_init(&corrupt_blocks);
3999         root->fs_info->corrupt_blocks = &corrupt_blocks;
4000
4001         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4002                 rec = get_root_rec(root_cache, root->root_key.objectid);
4003                 BUG_ON(IS_ERR(rec));
4004                 if (btrfs_root_refs(root_item) > 0)
4005                         rec->found_root_item = 1;
4006         }
4007
4008         btrfs_init_path(&path);
4009         memset(&root_node, 0, sizeof(root_node));
4010         cache_tree_init(&root_node.root_cache);
4011         cache_tree_init(&root_node.inode_cache);
4012         memset(&nrefs, 0, sizeof(nrefs));
4013
4014         /* Move the orphan extent record to corresponding inode_record */
4015         list_for_each_entry_safe(orphan, tmp,
4016                                  &root->orphan_data_extents, list) {
4017                 struct inode_record *inode;
4018
4019                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4020                                       1);
4021                 BUG_ON(IS_ERR(inode));
4022                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4023                 list_move(&orphan->list, &inode->orphan_extents);
4024         }
4025
4026         level = btrfs_header_level(root->node);
4027         memset(wc->nodes, 0, sizeof(wc->nodes));
4028         wc->nodes[level] = &root_node;
4029         wc->active_node = level;
4030         wc->root_level = level;
4031
4032         /* We may not have checked the root block, lets do that now */
4033         if (btrfs_is_leaf(root->node))
4034                 status = btrfs_check_leaf(root, NULL, root->node);
4035         else
4036                 status = btrfs_check_node(root, NULL, root->node);
4037         if (status != BTRFS_TREE_BLOCK_CLEAN)
4038                 return -EIO;
4039
4040         if (btrfs_root_refs(root_item) > 0 ||
4041             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4042                 path.nodes[level] = root->node;
4043                 extent_buffer_get(root->node);
4044                 path.slots[level] = 0;
4045         } else {
4046                 struct btrfs_key key;
4047                 struct btrfs_disk_key found_key;
4048
4049                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4050                 level = root_item->drop_level;
4051                 path.lowest_level = level;
4052                 if (level > btrfs_header_level(root->node) ||
4053                     level >= BTRFS_MAX_LEVEL) {
4054                         error("ignoring invalid drop level: %u", level);
4055                         goto skip_walking;
4056                 }
4057                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4058                 if (wret < 0)
4059                         goto skip_walking;
4060                 btrfs_node_key(path.nodes[level], &found_key,
4061                                 path.slots[level]);
4062                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4063                                         sizeof(found_key)));
4064         }
4065
4066         while (1) {
4067                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4068                 if (wret < 0)
4069                         ret = wret;
4070                 if (wret != 0)
4071                         break;
4072
4073                 wret = walk_up_tree(root, &path, wc, &level);
4074                 if (wret < 0)
4075                         ret = wret;
4076                 if (wret != 0)
4077                         break;
4078         }
4079 skip_walking:
4080         btrfs_release_path(&path);
4081
4082         if (!cache_tree_empty(&corrupt_blocks)) {
4083                 struct cache_extent *cache;
4084                 struct btrfs_corrupt_block *corrupt;
4085
4086                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4087                        root->root_key.objectid);
4088                 cache = first_cache_extent(&corrupt_blocks);
4089                 while (cache) {
4090                         corrupt = container_of(cache,
4091                                                struct btrfs_corrupt_block,
4092                                                cache);
4093                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4094                                cache->start, corrupt->level,
4095                                corrupt->key.objectid, corrupt->key.type,
4096                                corrupt->key.offset);
4097                         cache = next_cache_extent(cache);
4098                 }
4099                 if (repair) {
4100                         printf("Try to repair the btree for root %llu\n",
4101                                root->root_key.objectid);
4102                         ret = repair_btree(root, &corrupt_blocks);
4103                         if (ret < 0)
4104                                 fprintf(stderr, "Failed to repair btree: %s\n",
4105                                         strerror(-ret));
4106                         if (!ret)
4107                                 printf("Btree for root %llu is fixed\n",
4108                                        root->root_key.objectid);
4109                 }
4110         }
4111
4112         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4113         if (err < 0)
4114                 ret = err;
4115
4116         if (root_node.current) {
4117                 root_node.current->checked = 1;
4118                 maybe_free_inode_rec(&root_node.inode_cache,
4119                                 root_node.current);
4120         }
4121
4122         err = check_inode_recs(root, &root_node.inode_cache);
4123         if (!ret)
4124                 ret = err;
4125
4126         free_corrupt_blocks_tree(&corrupt_blocks);
4127         root->fs_info->corrupt_blocks = NULL;
4128         free_orphan_data_extents(&root->orphan_data_extents);
4129         return ret;
4130 }
4131
4132 static int fs_root_objectid(u64 objectid)
4133 {
4134         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4135             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4136                 return 1;
4137         return is_fstree(objectid);
4138 }
4139
4140 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4141                           struct cache_tree *root_cache)
4142 {
4143         struct btrfs_path path;
4144         struct btrfs_key key;
4145         struct walk_control wc;
4146         struct extent_buffer *leaf, *tree_node;
4147         struct btrfs_root *tmp_root;
4148         struct btrfs_root *tree_root = fs_info->tree_root;
4149         int ret;
4150         int err = 0;
4151
4152         if (ctx.progress_enabled) {
4153                 ctx.tp = TASK_FS_ROOTS;
4154                 task_start(ctx.info);
4155         }
4156
4157         /*
4158          * Just in case we made any changes to the extent tree that weren't
4159          * reflected into the free space cache yet.
4160          */
4161         if (repair)
4162                 reset_cached_block_groups(fs_info);
4163         memset(&wc, 0, sizeof(wc));
4164         cache_tree_init(&wc.shared);
4165         btrfs_init_path(&path);
4166
4167 again:
4168         key.offset = 0;
4169         key.objectid = 0;
4170         key.type = BTRFS_ROOT_ITEM_KEY;
4171         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4172         if (ret < 0) {
4173                 err = 1;
4174                 goto out;
4175         }
4176         tree_node = tree_root->node;
4177         while (1) {
4178                 if (tree_node != tree_root->node) {
4179                         free_root_recs_tree(root_cache);
4180                         btrfs_release_path(&path);
4181                         goto again;
4182                 }
4183                 leaf = path.nodes[0];
4184                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4185                         ret = btrfs_next_leaf(tree_root, &path);
4186                         if (ret) {
4187                                 if (ret < 0)
4188                                         err = 1;
4189                                 break;
4190                         }
4191                         leaf = path.nodes[0];
4192                 }
4193                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4194                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4195                     fs_root_objectid(key.objectid)) {
4196                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4197                                 tmp_root = btrfs_read_fs_root_no_cache(
4198                                                 fs_info, &key);
4199                         } else {
4200                                 key.offset = (u64)-1;
4201                                 tmp_root = btrfs_read_fs_root(
4202                                                 fs_info, &key);
4203                         }
4204                         if (IS_ERR(tmp_root)) {
4205                                 err = 1;
4206                                 goto next;
4207                         }
4208                         ret = check_fs_root(tmp_root, root_cache, &wc);
4209                         if (ret == -EAGAIN) {
4210                                 free_root_recs_tree(root_cache);
4211                                 btrfs_release_path(&path);
4212                                 goto again;
4213                         }
4214                         if (ret)
4215                                 err = 1;
4216                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4217                                 btrfs_free_fs_root(tmp_root);
4218                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4219                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4220                         process_root_ref(leaf, path.slots[0], &key,
4221                                          root_cache);
4222                 }
4223 next:
4224                 path.slots[0]++;
4225         }
4226 out:
4227         btrfs_release_path(&path);
4228         if (err)
4229                 free_extent_cache_tree(&wc.shared);
4230         if (!cache_tree_empty(&wc.shared))
4231                 fprintf(stderr, "warning line %d\n", __LINE__);
4232
4233         task_stop(ctx.info);
4234
4235         return err;
4236 }
4237
4238 /*
4239  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4240  * INODE_REF/INODE_EXTREF match.
4241  *
4242  * @root:       the root of the fs/file tree
4243  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4244  * @key:        the key of the DIR_ITEM/DIR_INDEX
4245  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4246  *              distinguish root_dir between normal dir/file
4247  * @name:       the name in the INODE_REF/INODE_EXTREF
4248  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4249  * @mode:       the st_mode of INODE_ITEM
4250  *
4251  * Return 0 if no error occurred.
4252  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4253  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4254  * dir/file.
4255  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4256  * not match for normal dir/file.
4257  */
4258 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4259                          struct btrfs_key *key, u64 index, char *name,
4260                          u32 namelen, u32 mode)
4261 {
4262         struct btrfs_path path;
4263         struct extent_buffer *node;
4264         struct btrfs_dir_item *di;
4265         struct btrfs_key location;
4266         char namebuf[BTRFS_NAME_LEN] = {0};
4267         u32 total;
4268         u32 cur = 0;
4269         u32 len;
4270         u32 name_len;
4271         u32 data_len;
4272         u8 filetype;
4273         int slot;
4274         int ret;
4275
4276         btrfs_init_path(&path);
4277         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4278         if (ret < 0) {
4279                 ret = DIR_ITEM_MISSING;
4280                 goto out;
4281         }
4282
4283         /* Process root dir and goto out*/
4284         if (index == 0) {
4285                 if (ret == 0) {
4286                         ret = ROOT_DIR_ERROR;
4287                         error(
4288                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4289                                 root->objectid,
4290                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4291                                         "REF" : "EXTREF",
4292                                 ref_key->objectid, ref_key->offset,
4293                                 key->type == BTRFS_DIR_ITEM_KEY ?
4294                                         "DIR_ITEM" : "DIR_INDEX");
4295                 } else {
4296                         ret = 0;
4297                 }
4298
4299                 goto out;
4300         }
4301
4302         /* Process normal file/dir */
4303         if (ret > 0) {
4304                 ret = DIR_ITEM_MISSING;
4305                 error(
4306                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4307                         root->objectid,
4308                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4309                         ref_key->objectid, ref_key->offset,
4310                         key->type == BTRFS_DIR_ITEM_KEY ?
4311                                 "DIR_ITEM" : "DIR_INDEX",
4312                         key->objectid, key->offset, namelen, name,
4313                         imode_to_type(mode));
4314                 goto out;
4315         }
4316
4317         /* Check whether inode_id/filetype/name match */
4318         node = path.nodes[0];
4319         slot = path.slots[0];
4320         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4321         total = btrfs_item_size_nr(node, slot);
4322         while (cur < total) {
4323                 ret = DIR_ITEM_MISMATCH;
4324                 name_len = btrfs_dir_name_len(node, di);
4325                 data_len = btrfs_dir_data_len(node, di);
4326
4327                 btrfs_dir_item_key_to_cpu(node, di, &location);
4328                 if (location.objectid != ref_key->objectid ||
4329                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4330                     location.offset != 0)
4331                         goto next;
4332
4333                 filetype = btrfs_dir_type(node, di);
4334                 if (imode_to_type(mode) != filetype)
4335                         goto next;
4336
4337                 if (cur + sizeof(*di) + name_len > total ||
4338                     name_len > BTRFS_NAME_LEN) {
4339                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4340                                 root->objectid,
4341                                 key->type == BTRFS_DIR_ITEM_KEY ?
4342                                 "DIR_ITEM" : "DIR_INDEX",
4343                                 key->objectid, key->offset, name_len);
4344
4345                         if (cur + sizeof(*di) > total)
4346                                 break;
4347                         len = min_t(u32, total - cur - sizeof(*di),
4348                                     BTRFS_NAME_LEN);
4349                 } else {
4350                         len = name_len;
4351                 }
4352
4353                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4354                 if (len != namelen || strncmp(namebuf, name, len))
4355                         goto next;
4356
4357                 ret = 0;
4358                 goto out;
4359 next:
4360                 len = sizeof(*di) + name_len + data_len;
4361                 di = (struct btrfs_dir_item *)((char *)di + len);
4362                 cur += len;
4363         }
4364         if (ret == DIR_ITEM_MISMATCH)
4365                 error(
4366                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4367                         root->objectid,
4368                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4369                         ref_key->objectid, ref_key->offset,
4370                         key->type == BTRFS_DIR_ITEM_KEY ?
4371                                 "DIR_ITEM" : "DIR_INDEX",
4372                         key->objectid, key->offset, namelen, name,
4373                         imode_to_type(mode));
4374 out:
4375         btrfs_release_path(&path);
4376         return ret;
4377 }
4378
4379 /*
4380  * Traverse the given INODE_REF and call find_dir_item() to find related
4381  * DIR_ITEM/DIR_INDEX.
4382  *
4383  * @root:       the root of the fs/file tree
4384  * @ref_key:    the key of the INODE_REF
4385  * @refs:       the count of INODE_REF
4386  * @mode:       the st_mode of INODE_ITEM
4387  *
4388  * Return 0 if no error occurred.
4389  */
4390 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4391                            struct extent_buffer *node, int slot, u64 *refs,
4392                            int mode)
4393 {
4394         struct btrfs_key key;
4395         struct btrfs_inode_ref *ref;
4396         char namebuf[BTRFS_NAME_LEN] = {0};
4397         u32 total;
4398         u32 cur = 0;
4399         u32 len;
4400         u32 name_len;
4401         u64 index;
4402         int ret, err = 0;
4403
4404         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4405         total = btrfs_item_size_nr(node, slot);
4406
4407 next:
4408         /* Update inode ref count */
4409         (*refs)++;
4410
4411         index = btrfs_inode_ref_index(node, ref);
4412         name_len = btrfs_inode_ref_name_len(node, ref);
4413         if (cur + sizeof(*ref) + name_len > total ||
4414             name_len > BTRFS_NAME_LEN) {
4415                 warning("root %llu INODE_REF[%llu %llu] name too long",
4416                         root->objectid, ref_key->objectid, ref_key->offset);
4417
4418                 if (total < cur + sizeof(*ref))
4419                         goto out;
4420                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4421         } else {
4422                 len = name_len;
4423         }
4424
4425         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4426
4427         /* Check root dir ref name */
4428         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4429                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4430                       root->objectid, ref_key->objectid, ref_key->offset,
4431                       namebuf);
4432                 err |= ROOT_DIR_ERROR;
4433         }
4434
4435         /* Find related DIR_INDEX */
4436         key.objectid = ref_key->offset;
4437         key.type = BTRFS_DIR_INDEX_KEY;
4438         key.offset = index;
4439         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4440         err |= ret;
4441
4442         /* Find related dir_item */
4443         key.objectid = ref_key->offset;
4444         key.type = BTRFS_DIR_ITEM_KEY;
4445         key.offset = btrfs_name_hash(namebuf, len);
4446         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4447         err |= ret;
4448
4449         len = sizeof(*ref) + name_len;
4450         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4451         cur += len;
4452         if (cur < total)
4453                 goto next;
4454
4455 out:
4456         return err;
4457 }
4458
4459 /*
4460  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4461  * DIR_ITEM/DIR_INDEX.
4462  *
4463  * @root:       the root of the fs/file tree
4464  * @ref_key:    the key of the INODE_EXTREF
4465  * @refs:       the count of INODE_EXTREF
4466  * @mode:       the st_mode of INODE_ITEM
4467  *
4468  * Return 0 if no error occurred.
4469  */
4470 static int check_inode_extref(struct btrfs_root *root,
4471                               struct btrfs_key *ref_key,
4472                               struct extent_buffer *node, int slot, u64 *refs,
4473                               int mode)
4474 {
4475         struct btrfs_key key;
4476         struct btrfs_inode_extref *extref;
4477         char namebuf[BTRFS_NAME_LEN] = {0};
4478         u32 total;
4479         u32 cur = 0;
4480         u32 len;
4481         u32 name_len;
4482         u64 index;
4483         u64 parent;
4484         int ret;
4485         int err = 0;
4486
4487         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4488         total = btrfs_item_size_nr(node, slot);
4489
4490 next:
4491         /* update inode ref count */
4492         (*refs)++;
4493         name_len = btrfs_inode_extref_name_len(node, extref);
4494         index = btrfs_inode_extref_index(node, extref);
4495         parent = btrfs_inode_extref_parent(node, extref);
4496         if (name_len <= BTRFS_NAME_LEN) {
4497                 len = name_len;
4498         } else {
4499                 len = BTRFS_NAME_LEN;
4500                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4501                         root->objectid, ref_key->objectid, ref_key->offset);
4502         }
4503         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4504
4505         /* Check root dir ref name */
4506         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4507                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4508                       root->objectid, ref_key->objectid, ref_key->offset,
4509                       namebuf);
4510                 err |= ROOT_DIR_ERROR;
4511         }
4512
4513         /* find related dir_index */
4514         key.objectid = parent;
4515         key.type = BTRFS_DIR_INDEX_KEY;
4516         key.offset = index;
4517         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4518         err |= ret;
4519
4520         /* find related dir_item */
4521         key.objectid = parent;
4522         key.type = BTRFS_DIR_ITEM_KEY;
4523         key.offset = btrfs_name_hash(namebuf, len);
4524         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4525         err |= ret;
4526
4527         len = sizeof(*extref) + name_len;
4528         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4529         cur += len;
4530
4531         if (cur < total)
4532                 goto next;
4533
4534         return err;
4535 }
4536
4537 /*
4538  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4539  * DIR_ITEM/DIR_INDEX match.
4540  *
4541  * @root:       the root of the fs/file tree
4542  * @key:        the key of the INODE_REF/INODE_EXTREF
4543  * @name:       the name in the INODE_REF/INODE_EXTREF
4544  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4545  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4546  * to (u64)-1
4547  * @ext_ref:    the EXTENDED_IREF feature
4548  *
4549  * Return 0 if no error occurred.
4550  * Return >0 for error bitmap
4551  */
4552 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4553                           char *name, int namelen, u64 index,
4554                           unsigned int ext_ref)
4555 {
4556         struct btrfs_path path;
4557         struct btrfs_inode_ref *ref;
4558         struct btrfs_inode_extref *extref;
4559         struct extent_buffer *node;
4560         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4561         u32 total;
4562         u32 cur = 0;
4563         u32 len;
4564         u32 ref_namelen;
4565         u64 ref_index;
4566         u64 parent;
4567         u64 dir_id;
4568         int slot;
4569         int ret;
4570
4571         btrfs_init_path(&path);
4572         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4573         if (ret) {
4574                 ret = INODE_REF_MISSING;
4575                 goto extref;
4576         }
4577
4578         node = path.nodes[0];
4579         slot = path.slots[0];
4580
4581         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4582         total = btrfs_item_size_nr(node, slot);
4583
4584         /* Iterate all entry of INODE_REF */
4585         while (cur < total) {
4586                 ret = INODE_REF_MISSING;
4587
4588                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4589                 ref_index = btrfs_inode_ref_index(node, ref);
4590                 if (index != (u64)-1 && index != ref_index)
4591                         goto next_ref;
4592
4593                 if (cur + sizeof(*ref) + ref_namelen > total ||
4594                     ref_namelen > BTRFS_NAME_LEN) {
4595                         warning("root %llu INODE %s[%llu %llu] name too long",
4596                                 root->objectid,
4597                                 key->type == BTRFS_INODE_REF_KEY ?
4598                                         "REF" : "EXTREF",
4599                                 key->objectid, key->offset);
4600
4601                         if (cur + sizeof(*ref) > total)
4602                                 break;
4603                         len = min_t(u32, total - cur - sizeof(*ref),
4604                                     BTRFS_NAME_LEN);
4605                 } else {
4606                         len = ref_namelen;
4607                 }
4608
4609                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4610                                    len);
4611
4612                 if (len != namelen || strncmp(ref_namebuf, name, len))
4613                         goto next_ref;
4614
4615                 ret = 0;
4616                 goto out;
4617 next_ref:
4618                 len = sizeof(*ref) + ref_namelen;
4619                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4620                 cur += len;
4621         }
4622
4623 extref:
4624         /* Skip if not support EXTENDED_IREF feature */
4625         if (!ext_ref)
4626                 goto out;
4627
4628         btrfs_release_path(&path);
4629         btrfs_init_path(&path);
4630
4631         dir_id = key->offset;
4632         key->type = BTRFS_INODE_EXTREF_KEY;
4633         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4634
4635         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4636         if (ret) {
4637                 ret = INODE_REF_MISSING;
4638                 goto out;
4639         }
4640
4641         node = path.nodes[0];
4642         slot = path.slots[0];
4643
4644         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4645         cur = 0;
4646         total = btrfs_item_size_nr(node, slot);
4647
4648         /* Iterate all entry of INODE_EXTREF */
4649         while (cur < total) {
4650                 ret = INODE_REF_MISSING;
4651
4652                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4653                 ref_index = btrfs_inode_extref_index(node, extref);
4654                 parent = btrfs_inode_extref_parent(node, extref);
4655                 if (index != (u64)-1 && index != ref_index)
4656                         goto next_extref;
4657
4658                 if (parent != dir_id)
4659                         goto next_extref;
4660
4661                 if (ref_namelen <= BTRFS_NAME_LEN) {
4662                         len = ref_namelen;
4663                 } else {
4664                         len = BTRFS_NAME_LEN;
4665                         warning("root %llu INODE %s[%llu %llu] name too long",
4666                                 root->objectid,
4667                                 key->type == BTRFS_INODE_REF_KEY ?
4668                                         "REF" : "EXTREF",
4669                                 key->objectid, key->offset);
4670                 }
4671                 read_extent_buffer(node, ref_namebuf,
4672                                    (unsigned long)(extref + 1), len);
4673
4674                 if (len != namelen || strncmp(ref_namebuf, name, len))
4675                         goto next_extref;
4676
4677                 ret = 0;
4678                 goto out;
4679
4680 next_extref:
4681                 len = sizeof(*extref) + ref_namelen;
4682                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4683                 cur += len;
4684
4685         }
4686 out:
4687         btrfs_release_path(&path);
4688         return ret;
4689 }
4690
4691 /*
4692  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4693  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4694  *
4695  * @root:       the root of the fs/file tree
4696  * @key:        the key of the INODE_REF/INODE_EXTREF
4697  * @size:       the st_size of the INODE_ITEM
4698  * @ext_ref:    the EXTENDED_IREF feature
4699  *
4700  * Return 0 if no error occurred.
4701  */
4702 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4703                           struct extent_buffer *node, int slot, u64 *size,
4704                           unsigned int ext_ref)
4705 {
4706         struct btrfs_dir_item *di;
4707         struct btrfs_inode_item *ii;
4708         struct btrfs_path path;
4709         struct btrfs_key location;
4710         char namebuf[BTRFS_NAME_LEN] = {0};
4711         u32 total;
4712         u32 cur = 0;
4713         u32 len;
4714         u32 name_len;
4715         u32 data_len;
4716         u8 filetype;
4717         u32 mode;
4718         u64 index;
4719         int ret;
4720         int err = 0;
4721
4722         /*
4723          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4724          * ignore index check.
4725          */
4726         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4727
4728         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4729         total = btrfs_item_size_nr(node, slot);
4730
4731         while (cur < total) {
4732                 data_len = btrfs_dir_data_len(node, di);
4733                 if (data_len)
4734                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4735                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4736                               "DIR_ITEM" : "DIR_INDEX",
4737                               key->objectid, key->offset, data_len);
4738
4739                 name_len = btrfs_dir_name_len(node, di);
4740                 if (cur + sizeof(*di) + name_len > total ||
4741                     name_len > BTRFS_NAME_LEN) {
4742                         warning("root %llu %s[%llu %llu] name too long",
4743                                 root->objectid,
4744                                 key->type == BTRFS_DIR_ITEM_KEY ?
4745                                 "DIR_ITEM" : "DIR_INDEX",
4746                                 key->objectid, key->offset);
4747
4748                         if (cur + sizeof(*di) > total)
4749                                 break;
4750                         len = min_t(u32, total - cur - sizeof(*di),
4751                                     BTRFS_NAME_LEN);
4752                 } else {
4753                         len = name_len;
4754                 }
4755                 (*size) += name_len;
4756
4757                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4758                 filetype = btrfs_dir_type(node, di);
4759
4760                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4761                     key->offset != btrfs_name_hash(namebuf, len)) {
4762                         err |= -EIO;
4763                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4764                                 root->objectid, key->objectid, key->offset,
4765                                 namebuf, len, filetype, key->offset,
4766                                 btrfs_name_hash(namebuf, len));
4767                 }
4768
4769                 btrfs_init_path(&path);
4770                 btrfs_dir_item_key_to_cpu(node, di, &location);
4771
4772                 /* Ignore related ROOT_ITEM check */
4773                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4774                         goto next;
4775
4776                 /* Check relative INODE_ITEM(existence/filetype) */
4777                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4778                 if (ret) {
4779                         err |= INODE_ITEM_MISSING;
4780                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4781                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4782                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4783                               key->offset, location.objectid, name_len,
4784                               namebuf, filetype);
4785                         goto next;
4786                 }
4787
4788                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4789                                     struct btrfs_inode_item);
4790                 mode = btrfs_inode_mode(path.nodes[0], ii);
4791
4792                 if (imode_to_type(mode) != filetype) {
4793                         err |= INODE_ITEM_MISMATCH;
4794                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4795                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4796                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4797                               key->offset, name_len, namebuf, filetype);
4798                 }
4799
4800                 /* Check relative INODE_REF/INODE_EXTREF */
4801                 location.type = BTRFS_INODE_REF_KEY;
4802                 location.offset = key->objectid;
4803                 ret = find_inode_ref(root, &location, namebuf, len,
4804                                        index, ext_ref);
4805                 err |= ret;
4806                 if (ret & INODE_REF_MISSING)
4807                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4808                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4809                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4810                               key->offset, name_len, namebuf, filetype);
4811
4812 next:
4813                 btrfs_release_path(&path);
4814                 len = sizeof(*di) + name_len + data_len;
4815                 di = (struct btrfs_dir_item *)((char *)di + len);
4816                 cur += len;
4817
4818                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4819                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4820                               root->objectid, key->objectid, key->offset);
4821                         break;
4822                 }
4823         }
4824
4825         return err;
4826 }
4827
4828 /*
4829  * Check file extent datasum/hole, update the size of the file extents,
4830  * check and update the last offset of the file extent.
4831  *
4832  * @root:       the root of fs/file tree.
4833  * @fkey:       the key of the file extent.
4834  * @nodatasum:  INODE_NODATASUM feature.
4835  * @size:       the sum of all EXTENT_DATA items size for this inode.
4836  * @end:        the offset of the last extent.
4837  *
4838  * Return 0 if no error occurred.
4839  */
4840 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4841                              struct extent_buffer *node, int slot,
4842                              unsigned int nodatasum, u64 *size, u64 *end)
4843 {
4844         struct btrfs_file_extent_item *fi;
4845         u64 disk_bytenr;
4846         u64 disk_num_bytes;
4847         u64 extent_num_bytes;
4848         u64 extent_offset;
4849         u64 csum_found;         /* In byte size, sectorsize aligned */
4850         u64 search_start;       /* Logical range start we search for csum */
4851         u64 search_len;         /* Logical range len we search for csum */
4852         unsigned int extent_type;
4853         unsigned int is_hole;
4854         int compressed = 0;
4855         int ret;
4856         int err = 0;
4857
4858         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4859
4860         /* Check inline extent */
4861         extent_type = btrfs_file_extent_type(node, fi);
4862         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4863                 struct btrfs_item *e = btrfs_item_nr(slot);
4864                 u32 item_inline_len;
4865
4866                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4867                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4868                 compressed = btrfs_file_extent_compression(node, fi);
4869                 if (extent_num_bytes == 0) {
4870                         error(
4871                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4872                                 root->objectid, fkey->objectid, fkey->offset);
4873                         err |= FILE_EXTENT_ERROR;
4874                 }
4875                 if (!compressed && extent_num_bytes != item_inline_len) {
4876                         error(
4877                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4878                                 root->objectid, fkey->objectid, fkey->offset,
4879                                 extent_num_bytes, item_inline_len);
4880                         err |= FILE_EXTENT_ERROR;
4881                 }
4882                 *end += extent_num_bytes;
4883                 *size += extent_num_bytes;
4884                 return err;
4885         }
4886
4887         /* Check extent type */
4888         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4889                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4890                 err |= FILE_EXTENT_ERROR;
4891                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4892                       root->objectid, fkey->objectid, fkey->offset);
4893                 return err;
4894         }
4895
4896         /* Check REG_EXTENT/PREALLOC_EXTENT */
4897         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4898         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4899         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4900         extent_offset = btrfs_file_extent_offset(node, fi);
4901         compressed = btrfs_file_extent_compression(node, fi);
4902         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4903
4904         /*
4905          * Check EXTENT_DATA csum
4906          *
4907          * For plain (uncompressed) extent, we should only check the range
4908          * we're referring to, as it's possible that part of prealloc extent
4909          * has been written, and has csum:
4910          *
4911          * |<--- Original large preallocated extent A ---->|
4912          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4913          *      No csum                         Has csum
4914          *
4915          * For compressed extent, we should check the whole range.
4916          */
4917         if (!compressed) {
4918                 search_start = disk_bytenr + extent_offset;
4919                 search_len = extent_num_bytes;
4920         } else {
4921                 search_start = disk_bytenr;
4922                 search_len = disk_num_bytes;
4923         }
4924         ret = count_csum_range(root, search_start, search_len, &csum_found);
4925         if (csum_found > 0 && nodatasum) {
4926                 err |= ODD_CSUM_ITEM;
4927                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4928                       root->objectid, fkey->objectid, fkey->offset);
4929         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4930                    !is_hole && (ret < 0 || csum_found < search_len)) {
4931                 err |= CSUM_ITEM_MISSING;
4932                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4933                       root->objectid, fkey->objectid, fkey->offset,
4934                       csum_found, search_len);
4935         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4936                 err |= ODD_CSUM_ITEM;
4937                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4938                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4939         }
4940
4941         /* Check EXTENT_DATA hole */
4942         if (!no_holes && *end != fkey->offset) {
4943                 err |= FILE_EXTENT_ERROR;
4944                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4945                       root->objectid, fkey->objectid, fkey->offset);
4946         }
4947
4948         *end += extent_num_bytes;
4949         if (!is_hole)
4950                 *size += extent_num_bytes;
4951
4952         return err;
4953 }
4954
4955 /*
4956  * Check INODE_ITEM and related ITEMs (the same inode number)
4957  * 1. check link count
4958  * 2. check inode ref/extref
4959  * 3. check dir item/index
4960  *
4961  * @ext_ref:    the EXTENDED_IREF feature
4962  *
4963  * Return 0 if no error occurred.
4964  * Return >0 for error or hit the traversal is done(by error bitmap)
4965  */
4966 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4967                             unsigned int ext_ref)
4968 {
4969         struct extent_buffer *node;
4970         struct btrfs_inode_item *ii;
4971         struct btrfs_key key;
4972         u64 inode_id;
4973         u32 mode;
4974         u64 nlink;
4975         u64 nbytes;
4976         u64 isize;
4977         u64 size = 0;
4978         u64 refs = 0;
4979         u64 extent_end = 0;
4980         u64 extent_size = 0;
4981         unsigned int dir;
4982         unsigned int nodatasum;
4983         int slot;
4984         int ret;
4985         int err = 0;
4986
4987         node = path->nodes[0];
4988         slot = path->slots[0];
4989
4990         btrfs_item_key_to_cpu(node, &key, slot);
4991         inode_id = key.objectid;
4992
4993         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4994                 ret = btrfs_next_item(root, path);
4995                 if (ret > 0)
4996                         err |= LAST_ITEM;
4997                 return err;
4998         }
4999
5000         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5001         isize = btrfs_inode_size(node, ii);
5002         nbytes = btrfs_inode_nbytes(node, ii);
5003         mode = btrfs_inode_mode(node, ii);
5004         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5005         nlink = btrfs_inode_nlink(node, ii);
5006         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5007
5008         while (1) {
5009                 ret = btrfs_next_item(root, path);
5010                 if (ret < 0) {
5011                         /* out will fill 'err' rusing current statistics */
5012                         goto out;
5013                 } else if (ret > 0) {
5014                         err |= LAST_ITEM;
5015                         goto out;
5016                 }
5017
5018                 node = path->nodes[0];
5019                 slot = path->slots[0];
5020                 btrfs_item_key_to_cpu(node, &key, slot);
5021                 if (key.objectid != inode_id)
5022                         goto out;
5023
5024                 switch (key.type) {
5025                 case BTRFS_INODE_REF_KEY:
5026                         ret = check_inode_ref(root, &key, node, slot, &refs,
5027                                               mode);
5028                         err |= ret;
5029                         break;
5030                 case BTRFS_INODE_EXTREF_KEY:
5031                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5032                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5033                                         root->objectid, key.objectid,
5034                                         key.offset);
5035                         ret = check_inode_extref(root, &key, node, slot, &refs,
5036                                                  mode);
5037                         err |= ret;
5038                         break;
5039                 case BTRFS_DIR_ITEM_KEY:
5040                 case BTRFS_DIR_INDEX_KEY:
5041                         if (!dir) {
5042                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5043                                         root->objectid, inode_id,
5044                                         imode_to_type(mode), key.objectid,
5045                                         key.offset);
5046                         }
5047                         ret = check_dir_item(root, &key, node, slot, &size,
5048                                              ext_ref);
5049                         err |= ret;
5050                         break;
5051                 case BTRFS_EXTENT_DATA_KEY:
5052                         if (dir) {
5053                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5054                                         root->objectid, inode_id, key.objectid,
5055                                         key.offset);
5056                         }
5057                         ret = check_file_extent(root, &key, node, slot,
5058                                                 nodatasum, &extent_size,
5059                                                 &extent_end);
5060                         err |= ret;
5061                         break;
5062                 case BTRFS_XATTR_ITEM_KEY:
5063                         break;
5064                 default:
5065                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5066                               key.objectid, key.type, key.offset);
5067                 }
5068         }
5069
5070 out:
5071         /* verify INODE_ITEM nlink/isize/nbytes */
5072         if (dir) {
5073                 if (nlink != 1) {
5074                         err |= LINK_COUNT_ERROR;
5075                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5076                               root->objectid, inode_id, nlink);
5077                 }
5078
5079                 /*
5080                  * Just a warning, as dir inode nbytes is just an
5081                  * instructive value.
5082                  */
5083                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5084                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5085                                 root->objectid, inode_id,
5086                                 root->fs_info->nodesize);
5087                 }
5088
5089                 if (isize != size) {
5090                         err |= ISIZE_ERROR;
5091                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5092                               root->objectid, inode_id, isize, size);
5093                 }
5094         } else {
5095                 if (nlink != refs) {
5096                         err |= LINK_COUNT_ERROR;
5097                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5098                               root->objectid, inode_id, nlink, refs);
5099                 } else if (!nlink) {
5100                         err |= ORPHAN_ITEM;
5101                 }
5102
5103                 if (!nbytes && !no_holes && extent_end < isize) {
5104                         err |= NBYTES_ERROR;
5105                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5106                               root->objectid, inode_id, isize);
5107                 }
5108
5109                 if (nbytes != extent_size) {
5110                         err |= NBYTES_ERROR;
5111                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5112                               root->objectid, inode_id, nbytes, extent_size);
5113                 }
5114         }
5115
5116         return err;
5117 }
5118
5119 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5120 {
5121         struct btrfs_path path;
5122         struct btrfs_key key;
5123         int err = 0;
5124         int ret;
5125
5126         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5127         key.type = BTRFS_INODE_ITEM_KEY;
5128         key.offset = 0;
5129
5130         /* For root being dropped, we don't need to check first inode */
5131         if (btrfs_root_refs(&root->root_item) == 0 &&
5132             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5133             key.objectid)
5134                 return 0;
5135
5136         btrfs_init_path(&path);
5137
5138         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5139         if (ret < 0)
5140                 goto out;
5141         if (ret > 0) {
5142                 ret = 0;
5143                 err |= INODE_ITEM_MISSING;
5144                 error("first inode item of root %llu is missing",
5145                       root->objectid);
5146         }
5147
5148         err |= check_inode_item(root, &path, ext_ref);
5149         err &= ~LAST_ITEM;
5150         if (err && !ret)
5151                 ret = -EIO;
5152 out:
5153         btrfs_release_path(&path);
5154         return ret;
5155 }
5156
5157 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5158                                                 u64 parent, u64 root)
5159 {
5160         struct rb_node *node;
5161         struct tree_backref *back = NULL;
5162         struct tree_backref match = {
5163                 .node = {
5164                         .is_data = 0,
5165                 },
5166         };
5167
5168         if (parent) {
5169                 match.parent = parent;
5170                 match.node.full_backref = 1;
5171         } else {
5172                 match.root = root;
5173         }
5174
5175         node = rb_search(&rec->backref_tree, &match.node.node,
5176                          (rb_compare_keys)compare_extent_backref, NULL);
5177         if (node)
5178                 back = to_tree_backref(rb_node_to_extent_backref(node));
5179
5180         return back;
5181 }
5182
5183 static struct data_backref *find_data_backref(struct extent_record *rec,
5184                                                 u64 parent, u64 root,
5185                                                 u64 owner, u64 offset,
5186                                                 int found_ref,
5187                                                 u64 disk_bytenr, u64 bytes)
5188 {
5189         struct rb_node *node;
5190         struct data_backref *back = NULL;
5191         struct data_backref match = {
5192                 .node = {
5193                         .is_data = 1,
5194                 },
5195                 .owner = owner,
5196                 .offset = offset,
5197                 .bytes = bytes,
5198                 .found_ref = found_ref,
5199                 .disk_bytenr = disk_bytenr,
5200         };
5201
5202         if (parent) {
5203                 match.parent = parent;
5204                 match.node.full_backref = 1;
5205         } else {
5206                 match.root = root;
5207         }
5208
5209         node = rb_search(&rec->backref_tree, &match.node.node,
5210                          (rb_compare_keys)compare_extent_backref, NULL);
5211         if (node)
5212                 back = to_data_backref(rb_node_to_extent_backref(node));
5213
5214         return back;
5215 }
5216 /*
5217  * Iterate all item on the tree and call check_inode_item() to check.
5218  *
5219  * @root:       the root of the tree to be checked.
5220  * @ext_ref:    the EXTENDED_IREF feature
5221  *
5222  * Return 0 if no error found.
5223  * Return <0 for error.
5224  */
5225 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5226 {
5227         struct btrfs_path path;
5228         struct node_refs nrefs;
5229         struct btrfs_root_item *root_item = &root->root_item;
5230         int ret;
5231         int level;
5232         int err = 0;
5233
5234         /*
5235          * We need to manually check the first inode item(256)
5236          * As the following traversal function will only start from
5237          * the first inode item in the leaf, if inode item(256) is missing
5238          * we will just skip it forever.
5239          */
5240         ret = check_fs_first_inode(root, ext_ref);
5241         if (ret < 0)
5242                 return ret;
5243
5244         memset(&nrefs, 0, sizeof(nrefs));
5245         level = btrfs_header_level(root->node);
5246         btrfs_init_path(&path);
5247
5248         if (btrfs_root_refs(root_item) > 0 ||
5249             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5250                 path.nodes[level] = root->node;
5251                 path.slots[level] = 0;
5252                 extent_buffer_get(root->node);
5253         } else {
5254                 struct btrfs_key key;
5255
5256                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5257                 level = root_item->drop_level;
5258                 path.lowest_level = level;
5259                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5260                 if (ret < 0)
5261                         goto out;
5262                 ret = 0;
5263         }
5264
5265         while (1) {
5266                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5267                 err |= !!ret;
5268
5269                 /* if ret is negative, walk shall stop */
5270                 if (ret < 0) {
5271                         ret = err;
5272                         break;
5273                 }
5274
5275                 ret = walk_up_tree_v2(root, &path, &level);
5276                 if (ret != 0) {
5277                         /* Normal exit, reset ret to err */
5278                         ret = err;
5279                         break;
5280                 }
5281         }
5282
5283 out:
5284         btrfs_release_path(&path);
5285         return ret;
5286 }
5287
5288 /*
5289  * Find the relative ref for root_ref and root_backref.
5290  *
5291  * @root:       the root of the root tree.
5292  * @ref_key:    the key of the root ref.
5293  *
5294  * Return 0 if no error occurred.
5295  */
5296 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5297                           struct extent_buffer *node, int slot)
5298 {
5299         struct btrfs_path path;
5300         struct btrfs_key key;
5301         struct btrfs_root_ref *ref;
5302         struct btrfs_root_ref *backref;
5303         char ref_name[BTRFS_NAME_LEN] = {0};
5304         char backref_name[BTRFS_NAME_LEN] = {0};
5305         u64 ref_dirid;
5306         u64 ref_seq;
5307         u32 ref_namelen;
5308         u64 backref_dirid;
5309         u64 backref_seq;
5310         u32 backref_namelen;
5311         u32 len;
5312         int ret;
5313         int err = 0;
5314
5315         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5316         ref_dirid = btrfs_root_ref_dirid(node, ref);
5317         ref_seq = btrfs_root_ref_sequence(node, ref);
5318         ref_namelen = btrfs_root_ref_name_len(node, ref);
5319
5320         if (ref_namelen <= BTRFS_NAME_LEN) {
5321                 len = ref_namelen;
5322         } else {
5323                 len = BTRFS_NAME_LEN;
5324                 warning("%s[%llu %llu] ref_name too long",
5325                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5326                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5327                         ref_key->offset);
5328         }
5329         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5330
5331         /* Find relative root_ref */
5332         key.objectid = ref_key->offset;
5333         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5334         key.offset = ref_key->objectid;
5335
5336         btrfs_init_path(&path);
5337         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5338         if (ret) {
5339                 err |= ROOT_REF_MISSING;
5340                 error("%s[%llu %llu] couldn't find relative ref",
5341                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5342                       "ROOT_REF" : "ROOT_BACKREF",
5343                       ref_key->objectid, ref_key->offset);
5344                 goto out;
5345         }
5346
5347         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5348                                  struct btrfs_root_ref);
5349         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5350         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5351         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5352
5353         if (backref_namelen <= BTRFS_NAME_LEN) {
5354                 len = backref_namelen;
5355         } else {
5356                 len = BTRFS_NAME_LEN;
5357                 warning("%s[%llu %llu] ref_name too long",
5358                         key.type == BTRFS_ROOT_REF_KEY ?
5359                         "ROOT_REF" : "ROOT_BACKREF",
5360                         key.objectid, key.offset);
5361         }
5362         read_extent_buffer(path.nodes[0], backref_name,
5363                            (unsigned long)(backref + 1), len);
5364
5365         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5366             ref_namelen != backref_namelen ||
5367             strncmp(ref_name, backref_name, len)) {
5368                 err |= ROOT_REF_MISMATCH;
5369                 error("%s[%llu %llu] mismatch relative ref",
5370                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5371                       "ROOT_REF" : "ROOT_BACKREF",
5372                       ref_key->objectid, ref_key->offset);
5373         }
5374 out:
5375         btrfs_release_path(&path);
5376         return err;
5377 }
5378
5379 /*
5380  * Check all fs/file tree in low_memory mode.
5381  *
5382  * 1. for fs tree root item, call check_fs_root_v2()
5383  * 2. for fs tree root ref/backref, call check_root_ref()
5384  *
5385  * Return 0 if no error occurred.
5386  */
5387 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5388 {
5389         struct btrfs_root *tree_root = fs_info->tree_root;
5390         struct btrfs_root *cur_root = NULL;
5391         struct btrfs_path path;
5392         struct btrfs_key key;
5393         struct extent_buffer *node;
5394         unsigned int ext_ref;
5395         int slot;
5396         int ret;
5397         int err = 0;
5398
5399         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5400
5401         btrfs_init_path(&path);
5402         key.objectid = BTRFS_FS_TREE_OBJECTID;
5403         key.offset = 0;
5404         key.type = BTRFS_ROOT_ITEM_KEY;
5405
5406         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5407         if (ret < 0) {
5408                 err = ret;
5409                 goto out;
5410         } else if (ret > 0) {
5411                 err = -ENOENT;
5412                 goto out;
5413         }
5414
5415         while (1) {
5416                 node = path.nodes[0];
5417                 slot = path.slots[0];
5418                 btrfs_item_key_to_cpu(node, &key, slot);
5419                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5420                         goto out;
5421                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5422                     fs_root_objectid(key.objectid)) {
5423                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5424                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5425                                                                        &key);
5426                         } else {
5427                                 key.offset = (u64)-1;
5428                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5429                         }
5430
5431                         if (IS_ERR(cur_root)) {
5432                                 error("Fail to read fs/subvol tree: %lld",
5433                                       key.objectid);
5434                                 err = -EIO;
5435                                 goto next;
5436                         }
5437
5438                         ret = check_fs_root_v2(cur_root, ext_ref);
5439                         err |= ret;
5440
5441                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5442                                 btrfs_free_fs_root(cur_root);
5443                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5444                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5445                         ret = check_root_ref(tree_root, &key, node, slot);
5446                         err |= ret;
5447                 }
5448 next:
5449                 ret = btrfs_next_item(tree_root, &path);
5450                 if (ret > 0)
5451                         goto out;
5452                 if (ret < 0) {
5453                         err = ret;
5454                         goto out;
5455                 }
5456         }
5457
5458 out:
5459         btrfs_release_path(&path);
5460         return err;
5461 }
5462
5463 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5464                           struct cache_tree *root_cache)
5465 {
5466         int ret;
5467
5468         if (!ctx.progress_enabled)
5469                 fprintf(stderr, "checking fs roots\n");
5470         if (check_mode == CHECK_MODE_LOWMEM)
5471                 ret = check_fs_roots_v2(fs_info);
5472         else
5473                 ret = check_fs_roots(fs_info, root_cache);
5474
5475         return ret;
5476 }
5477
5478 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5479 {
5480         struct extent_backref *back, *tmp;
5481         struct tree_backref *tback;
5482         struct data_backref *dback;
5483         u64 found = 0;
5484         int err = 0;
5485
5486         rbtree_postorder_for_each_entry_safe(back, tmp,
5487                                              &rec->backref_tree, node) {
5488                 if (!back->found_extent_tree) {
5489                         err = 1;
5490                         if (!print_errs)
5491                                 goto out;
5492                         if (back->is_data) {
5493                                 dback = to_data_backref(back);
5494                                 fprintf(stderr, "Data backref %llu %s %llu"
5495                                         " owner %llu offset %llu num_refs %lu"
5496                                         " not found in extent tree\n",
5497                                         (unsigned long long)rec->start,
5498                                         back->full_backref ?
5499                                         "parent" : "root",
5500                                         back->full_backref ?
5501                                         (unsigned long long)dback->parent:
5502                                         (unsigned long long)dback->root,
5503                                         (unsigned long long)dback->owner,
5504                                         (unsigned long long)dback->offset,
5505                                         (unsigned long)dback->num_refs);
5506                         } else {
5507                                 tback = to_tree_backref(back);
5508                                 fprintf(stderr, "Tree backref %llu parent %llu"
5509                                         " root %llu not found in extent tree\n",
5510                                         (unsigned long long)rec->start,
5511                                         (unsigned long long)tback->parent,
5512                                         (unsigned long long)tback->root);
5513                         }
5514                 }
5515                 if (!back->is_data && !back->found_ref) {
5516                         err = 1;
5517                         if (!print_errs)
5518                                 goto out;
5519                         tback = to_tree_backref(back);
5520                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5521                                 (unsigned long long)rec->start,
5522                                 back->full_backref ? "parent" : "root",
5523                                 back->full_backref ?
5524                                 (unsigned long long)tback->parent :
5525                                 (unsigned long long)tback->root, back);
5526                 }
5527                 if (back->is_data) {
5528                         dback = to_data_backref(back);
5529                         if (dback->found_ref != dback->num_refs) {
5530                                 err = 1;
5531                                 if (!print_errs)
5532                                         goto out;
5533                                 fprintf(stderr, "Incorrect local backref count"
5534                                         " on %llu %s %llu owner %llu"
5535                                         " offset %llu found %u wanted %u back %p\n",
5536                                         (unsigned long long)rec->start,
5537                                         back->full_backref ?
5538                                         "parent" : "root",
5539                                         back->full_backref ?
5540                                         (unsigned long long)dback->parent:
5541                                         (unsigned long long)dback->root,
5542                                         (unsigned long long)dback->owner,
5543                                         (unsigned long long)dback->offset,
5544                                         dback->found_ref, dback->num_refs, back);
5545                         }
5546                         if (dback->disk_bytenr != rec->start) {
5547                                 err = 1;
5548                                 if (!print_errs)
5549                                         goto out;
5550                                 fprintf(stderr, "Backref disk bytenr does not"
5551                                         " match extent record, bytenr=%llu, "
5552                                         "ref bytenr=%llu\n",
5553                                         (unsigned long long)rec->start,
5554                                         (unsigned long long)dback->disk_bytenr);
5555                         }
5556
5557                         if (dback->bytes != rec->nr) {
5558                                 err = 1;
5559                                 if (!print_errs)
5560                                         goto out;
5561                                 fprintf(stderr, "Backref bytes do not match "
5562                                         "extent backref, bytenr=%llu, ref "
5563                                         "bytes=%llu, backref bytes=%llu\n",
5564                                         (unsigned long long)rec->start,
5565                                         (unsigned long long)rec->nr,
5566                                         (unsigned long long)dback->bytes);
5567                         }
5568                 }
5569                 if (!back->is_data) {
5570                         found += 1;
5571                 } else {
5572                         dback = to_data_backref(back);
5573                         found += dback->found_ref;
5574                 }
5575         }
5576         if (found != rec->refs) {
5577                 err = 1;
5578                 if (!print_errs)
5579                         goto out;
5580                 fprintf(stderr, "Incorrect global backref count "
5581                         "on %llu found %llu wanted %llu\n",
5582                         (unsigned long long)rec->start,
5583                         (unsigned long long)found,
5584                         (unsigned long long)rec->refs);
5585         }
5586 out:
5587         return err;
5588 }
5589
5590 static void __free_one_backref(struct rb_node *node)
5591 {
5592         struct extent_backref *back = rb_node_to_extent_backref(node);
5593
5594         free(back);
5595 }
5596
5597 static void free_all_extent_backrefs(struct extent_record *rec)
5598 {
5599         rb_free_nodes(&rec->backref_tree, __free_one_backref);
5600 }
5601
5602 static void free_extent_record_cache(struct cache_tree *extent_cache)
5603 {
5604         struct cache_extent *cache;
5605         struct extent_record *rec;
5606
5607         while (1) {
5608                 cache = first_cache_extent(extent_cache);
5609                 if (!cache)
5610                         break;
5611                 rec = container_of(cache, struct extent_record, cache);
5612                 remove_cache_extent(extent_cache, cache);
5613                 free_all_extent_backrefs(rec);
5614                 free(rec);
5615         }
5616 }
5617
5618 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5619                                  struct extent_record *rec)
5620 {
5621         if (rec->content_checked && rec->owner_ref_checked &&
5622             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5623             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5624             !rec->bad_full_backref && !rec->crossing_stripes &&
5625             !rec->wrong_chunk_type) {
5626                 remove_cache_extent(extent_cache, &rec->cache);
5627                 free_all_extent_backrefs(rec);
5628                 list_del_init(&rec->list);
5629                 free(rec);
5630         }
5631         return 0;
5632 }
5633
5634 static int check_owner_ref(struct btrfs_root *root,
5635                             struct extent_record *rec,
5636                             struct extent_buffer *buf)
5637 {
5638         struct extent_backref *node, *tmp;
5639         struct tree_backref *back;
5640         struct btrfs_root *ref_root;
5641         struct btrfs_key key;
5642         struct btrfs_path path;
5643         struct extent_buffer *parent;
5644         int level;
5645         int found = 0;
5646         int ret;
5647
5648         rbtree_postorder_for_each_entry_safe(node, tmp,
5649                                              &rec->backref_tree, node) {
5650                 if (node->is_data)
5651                         continue;
5652                 if (!node->found_ref)
5653                         continue;
5654                 if (node->full_backref)
5655                         continue;
5656                 back = to_tree_backref(node);
5657                 if (btrfs_header_owner(buf) == back->root)
5658                         return 0;
5659         }
5660         BUG_ON(rec->is_root);
5661
5662         /* try to find the block by search corresponding fs tree */
5663         key.objectid = btrfs_header_owner(buf);
5664         key.type = BTRFS_ROOT_ITEM_KEY;
5665         key.offset = (u64)-1;
5666
5667         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5668         if (IS_ERR(ref_root))
5669                 return 1;
5670
5671         level = btrfs_header_level(buf);
5672         if (level == 0)
5673                 btrfs_item_key_to_cpu(buf, &key, 0);
5674         else
5675                 btrfs_node_key_to_cpu(buf, &key, 0);
5676
5677         btrfs_init_path(&path);
5678         path.lowest_level = level + 1;
5679         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5680         if (ret < 0)
5681                 return 0;
5682
5683         parent = path.nodes[level + 1];
5684         if (parent && buf->start == btrfs_node_blockptr(parent,
5685                                                         path.slots[level + 1]))
5686                 found = 1;
5687
5688         btrfs_release_path(&path);
5689         return found ? 0 : 1;
5690 }
5691
5692 static int is_extent_tree_record(struct extent_record *rec)
5693 {
5694         struct extent_backref *node, *tmp;
5695         struct tree_backref *back;
5696         int is_extent = 0;
5697
5698         rbtree_postorder_for_each_entry_safe(node, tmp,
5699                                              &rec->backref_tree, node) {
5700                 if (node->is_data)
5701                         return 0;
5702                 back = to_tree_backref(node);
5703                 if (node->full_backref)
5704                         return 0;
5705                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5706                         is_extent = 1;
5707         }
5708         return is_extent;
5709 }
5710
5711
5712 static int record_bad_block_io(struct btrfs_fs_info *info,
5713                                struct cache_tree *extent_cache,
5714                                u64 start, u64 len)
5715 {
5716         struct extent_record *rec;
5717         struct cache_extent *cache;
5718         struct btrfs_key key;
5719
5720         cache = lookup_cache_extent(extent_cache, start, len);
5721         if (!cache)
5722                 return 0;
5723
5724         rec = container_of(cache, struct extent_record, cache);
5725         if (!is_extent_tree_record(rec))
5726                 return 0;
5727
5728         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5729         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5730 }
5731
5732 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5733                        struct extent_buffer *buf, int slot)
5734 {
5735         if (btrfs_header_level(buf)) {
5736                 struct btrfs_key_ptr ptr1, ptr2;
5737
5738                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5739                                    sizeof(struct btrfs_key_ptr));
5740                 read_extent_buffer(buf, &ptr2,
5741                                    btrfs_node_key_ptr_offset(slot + 1),
5742                                    sizeof(struct btrfs_key_ptr));
5743                 write_extent_buffer(buf, &ptr1,
5744                                     btrfs_node_key_ptr_offset(slot + 1),
5745                                     sizeof(struct btrfs_key_ptr));
5746                 write_extent_buffer(buf, &ptr2,
5747                                     btrfs_node_key_ptr_offset(slot),
5748                                     sizeof(struct btrfs_key_ptr));
5749                 if (slot == 0) {
5750                         struct btrfs_disk_key key;
5751                         btrfs_node_key(buf, &key, 0);
5752                         btrfs_fixup_low_keys(root, path, &key,
5753                                              btrfs_header_level(buf) + 1);
5754                 }
5755         } else {
5756                 struct btrfs_item *item1, *item2;
5757                 struct btrfs_key k1, k2;
5758                 char *item1_data, *item2_data;
5759                 u32 item1_offset, item2_offset, item1_size, item2_size;
5760
5761                 item1 = btrfs_item_nr(slot);
5762                 item2 = btrfs_item_nr(slot + 1);
5763                 btrfs_item_key_to_cpu(buf, &k1, slot);
5764                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5765                 item1_offset = btrfs_item_offset(buf, item1);
5766                 item2_offset = btrfs_item_offset(buf, item2);
5767                 item1_size = btrfs_item_size(buf, item1);
5768                 item2_size = btrfs_item_size(buf, item2);
5769
5770                 item1_data = malloc(item1_size);
5771                 if (!item1_data)
5772                         return -ENOMEM;
5773                 item2_data = malloc(item2_size);
5774                 if (!item2_data) {
5775                         free(item1_data);
5776                         return -ENOMEM;
5777                 }
5778
5779                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5780                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5781
5782                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5783                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5784                 free(item1_data);
5785                 free(item2_data);
5786
5787                 btrfs_set_item_offset(buf, item1, item2_offset);
5788                 btrfs_set_item_offset(buf, item2, item1_offset);
5789                 btrfs_set_item_size(buf, item1, item2_size);
5790                 btrfs_set_item_size(buf, item2, item1_size);
5791
5792                 path->slots[0] = slot;
5793                 btrfs_set_item_key_unsafe(root, path, &k2);
5794                 path->slots[0] = slot + 1;
5795                 btrfs_set_item_key_unsafe(root, path, &k1);
5796         }
5797         return 0;
5798 }
5799
5800 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5801 {
5802         struct extent_buffer *buf;
5803         struct btrfs_key k1, k2;
5804         int i;
5805         int level = path->lowest_level;
5806         int ret = -EIO;
5807
5808         buf = path->nodes[level];
5809         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5810                 if (level) {
5811                         btrfs_node_key_to_cpu(buf, &k1, i);
5812                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5813                 } else {
5814                         btrfs_item_key_to_cpu(buf, &k1, i);
5815                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5816                 }
5817                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5818                         continue;
5819                 ret = swap_values(root, path, buf, i);
5820                 if (ret)
5821                         break;
5822                 btrfs_mark_buffer_dirty(buf);
5823                 i = 0;
5824         }
5825         return ret;
5826 }
5827
5828 static int delete_bogus_item(struct btrfs_root *root,
5829                              struct btrfs_path *path,
5830                              struct extent_buffer *buf, int slot)
5831 {
5832         struct btrfs_key key;
5833         int nritems = btrfs_header_nritems(buf);
5834
5835         btrfs_item_key_to_cpu(buf, &key, slot);
5836
5837         /* These are all the keys we can deal with missing. */
5838         if (key.type != BTRFS_DIR_INDEX_KEY &&
5839             key.type != BTRFS_EXTENT_ITEM_KEY &&
5840             key.type != BTRFS_METADATA_ITEM_KEY &&
5841             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5842             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5843                 return -1;
5844
5845         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5846                (unsigned long long)key.objectid, key.type,
5847                (unsigned long long)key.offset, slot, buf->start);
5848         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5849                               btrfs_item_nr_offset(slot + 1),
5850                               sizeof(struct btrfs_item) *
5851                               (nritems - slot - 1));
5852         btrfs_set_header_nritems(buf, nritems - 1);
5853         if (slot == 0) {
5854                 struct btrfs_disk_key disk_key;
5855
5856                 btrfs_item_key(buf, &disk_key, 0);
5857                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5858         }
5859         btrfs_mark_buffer_dirty(buf);
5860         return 0;
5861 }
5862
5863 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5864 {
5865         struct extent_buffer *buf;
5866         int i;
5867         int ret = 0;
5868
5869         /* We should only get this for leaves */
5870         BUG_ON(path->lowest_level);
5871         buf = path->nodes[0];
5872 again:
5873         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5874                 unsigned int shift = 0, offset;
5875
5876                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5877                     BTRFS_LEAF_DATA_SIZE(root)) {
5878                         if (btrfs_item_end_nr(buf, i) >
5879                             BTRFS_LEAF_DATA_SIZE(root)) {
5880                                 ret = delete_bogus_item(root, path, buf, i);
5881                                 if (!ret)
5882                                         goto again;
5883                                 fprintf(stderr, "item is off the end of the "
5884                                         "leaf, can't fix\n");
5885                                 ret = -EIO;
5886                                 break;
5887                         }
5888                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5889                                 btrfs_item_end_nr(buf, i);
5890                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5891                            btrfs_item_offset_nr(buf, i - 1)) {
5892                         if (btrfs_item_end_nr(buf, i) >
5893                             btrfs_item_offset_nr(buf, i - 1)) {
5894                                 ret = delete_bogus_item(root, path, buf, i);
5895                                 if (!ret)
5896                                         goto again;
5897                                 fprintf(stderr, "items overlap, can't fix\n");
5898                                 ret = -EIO;
5899                                 break;
5900                         }
5901                         shift = btrfs_item_offset_nr(buf, i - 1) -
5902                                 btrfs_item_end_nr(buf, i);
5903                 }
5904                 if (!shift)
5905                         continue;
5906
5907                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5908                        i, shift, (unsigned long long)buf->start);
5909                 offset = btrfs_item_offset_nr(buf, i);
5910                 memmove_extent_buffer(buf,
5911                                       btrfs_leaf_data(buf) + offset + shift,
5912                                       btrfs_leaf_data(buf) + offset,
5913                                       btrfs_item_size_nr(buf, i));
5914                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5915                                       offset + shift);
5916                 btrfs_mark_buffer_dirty(buf);
5917         }
5918
5919         /*
5920          * We may have moved things, in which case we want to exit so we don't
5921          * write those changes out.  Once we have proper abort functionality in
5922          * progs this can be changed to something nicer.
5923          */
5924         BUG_ON(ret);
5925         return ret;
5926 }
5927
5928 /*
5929  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5930  * then just return -EIO.
5931  */
5932 static int try_to_fix_bad_block(struct btrfs_root *root,
5933                                 struct extent_buffer *buf,
5934                                 enum btrfs_tree_block_status status)
5935 {
5936         struct btrfs_trans_handle *trans;
5937         struct ulist *roots;
5938         struct ulist_node *node;
5939         struct btrfs_root *search_root;
5940         struct btrfs_path path;
5941         struct ulist_iterator iter;
5942         struct btrfs_key root_key, key;
5943         int ret;
5944
5945         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5946             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5947                 return -EIO;
5948
5949         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5950         if (ret)
5951                 return -EIO;
5952
5953         btrfs_init_path(&path);
5954         ULIST_ITER_INIT(&iter);
5955         while ((node = ulist_next(roots, &iter))) {
5956                 root_key.objectid = node->val;
5957                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5958                 root_key.offset = (u64)-1;
5959
5960                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5961                 if (IS_ERR(root)) {
5962                         ret = -EIO;
5963                         break;
5964                 }
5965
5966
5967                 trans = btrfs_start_transaction(search_root, 0);
5968                 if (IS_ERR(trans)) {
5969                         ret = PTR_ERR(trans);
5970                         break;
5971                 }
5972
5973                 path.lowest_level = btrfs_header_level(buf);
5974                 path.skip_check_block = 1;
5975                 if (path.lowest_level)
5976                         btrfs_node_key_to_cpu(buf, &key, 0);
5977                 else
5978                         btrfs_item_key_to_cpu(buf, &key, 0);
5979                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5980                 if (ret) {
5981                         ret = -EIO;
5982                         btrfs_commit_transaction(trans, search_root);
5983                         break;
5984                 }
5985                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5986                         ret = fix_key_order(search_root, &path);
5987                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5988                         ret = fix_item_offset(search_root, &path);
5989                 if (ret) {
5990                         btrfs_commit_transaction(trans, search_root);
5991                         break;
5992                 }
5993                 btrfs_release_path(&path);
5994                 btrfs_commit_transaction(trans, search_root);
5995         }
5996         ulist_free(roots);
5997         btrfs_release_path(&path);
5998         return ret;
5999 }
6000
6001 static int check_block(struct btrfs_root *root,
6002                        struct cache_tree *extent_cache,
6003                        struct extent_buffer *buf, u64 flags)
6004 {
6005         struct extent_record *rec;
6006         struct cache_extent *cache;
6007         struct btrfs_key key;
6008         enum btrfs_tree_block_status status;
6009         int ret = 0;
6010         int level;
6011
6012         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6013         if (!cache)
6014                 return 1;
6015         rec = container_of(cache, struct extent_record, cache);
6016         rec->generation = btrfs_header_generation(buf);
6017
6018         level = btrfs_header_level(buf);
6019         if (btrfs_header_nritems(buf) > 0) {
6020
6021                 if (level == 0)
6022                         btrfs_item_key_to_cpu(buf, &key, 0);
6023                 else
6024                         btrfs_node_key_to_cpu(buf, &key, 0);
6025
6026                 rec->info_objectid = key.objectid;
6027         }
6028         rec->info_level = level;
6029
6030         if (btrfs_is_leaf(buf))
6031                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6032         else
6033                 status = btrfs_check_node(root, &rec->parent_key, buf);
6034
6035         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6036                 if (repair)
6037                         status = try_to_fix_bad_block(root, buf, status);
6038                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6039                         ret = -EIO;
6040                         fprintf(stderr, "bad block %llu\n",
6041                                 (unsigned long long)buf->start);
6042                 } else {
6043                         /*
6044                          * Signal to callers we need to start the scan over
6045                          * again since we'll have cowed blocks.
6046                          */
6047                         ret = -EAGAIN;
6048                 }
6049         } else {
6050                 rec->content_checked = 1;
6051                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6052                         rec->owner_ref_checked = 1;
6053                 else {
6054                         ret = check_owner_ref(root, rec, buf);
6055                         if (!ret)
6056                                 rec->owner_ref_checked = 1;
6057                 }
6058         }
6059         if (!ret)
6060                 maybe_free_extent_rec(extent_cache, rec);
6061         return ret;
6062 }
6063
6064 #if 0
6065 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6066                                                 u64 parent, u64 root)
6067 {
6068         struct list_head *cur = rec->backrefs.next;
6069         struct extent_backref *node;
6070         struct tree_backref *back;
6071
6072         while(cur != &rec->backrefs) {
6073                 node = to_extent_backref(cur);
6074                 cur = cur->next;
6075                 if (node->is_data)
6076                         continue;
6077                 back = to_tree_backref(node);
6078                 if (parent > 0) {
6079                         if (!node->full_backref)
6080                                 continue;
6081                         if (parent == back->parent)
6082                                 return back;
6083                 } else {
6084                         if (node->full_backref)
6085                                 continue;
6086                         if (back->root == root)
6087                                 return back;
6088                 }
6089         }
6090         return NULL;
6091 }
6092 #endif
6093
6094 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6095                                                 u64 parent, u64 root)
6096 {
6097         struct tree_backref *ref = malloc(sizeof(*ref));
6098
6099         if (!ref)
6100                 return NULL;
6101         memset(&ref->node, 0, sizeof(ref->node));
6102         if (parent > 0) {
6103                 ref->parent = parent;
6104                 ref->node.full_backref = 1;
6105         } else {
6106                 ref->root = root;
6107                 ref->node.full_backref = 0;
6108         }
6109
6110         return ref;
6111 }
6112
6113 #if 0
6114 static struct data_backref *find_data_backref(struct extent_record *rec,
6115                                                 u64 parent, u64 root,
6116                                                 u64 owner, u64 offset,
6117                                                 int found_ref,
6118                                                 u64 disk_bytenr, u64 bytes)
6119 {
6120         struct list_head *cur = rec->backrefs.next;
6121         struct extent_backref *node;
6122         struct data_backref *back;
6123
6124         while(cur != &rec->backrefs) {
6125                 node = to_extent_backref(cur);
6126                 cur = cur->next;
6127                 if (!node->is_data)
6128                         continue;
6129                 back = to_data_backref(node);
6130                 if (parent > 0) {
6131                         if (!node->full_backref)
6132                                 continue;
6133                         if (parent == back->parent)
6134                                 return back;
6135                 } else {
6136                         if (node->full_backref)
6137                                 continue;
6138                         if (back->root == root && back->owner == owner &&
6139                             back->offset == offset) {
6140                                 if (found_ref && node->found_ref &&
6141                                     (back->bytes != bytes ||
6142                                     back->disk_bytenr != disk_bytenr))
6143                                         continue;
6144                                 return back;
6145                         }
6146                 }
6147         }
6148         return NULL;
6149 }
6150 #endif
6151
6152 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6153                                                 u64 parent, u64 root,
6154                                                 u64 owner, u64 offset,
6155                                                 u64 max_size)
6156 {
6157         struct data_backref *ref = malloc(sizeof(*ref));
6158
6159         if (!ref)
6160                 return NULL;
6161         memset(&ref->node, 0, sizeof(ref->node));
6162         ref->node.is_data = 1;
6163
6164         if (parent > 0) {
6165                 ref->parent = parent;
6166                 ref->owner = 0;
6167                 ref->offset = 0;
6168                 ref->node.full_backref = 1;
6169         } else {
6170                 ref->root = root;
6171                 ref->owner = owner;
6172                 ref->offset = offset;
6173                 ref->node.full_backref = 0;
6174         }
6175         ref->bytes = max_size;
6176         ref->found_ref = 0;
6177         ref->num_refs = 0;
6178         if (max_size > rec->max_size)
6179                 rec->max_size = max_size;
6180         return ref;
6181 }
6182
6183 /* Check if the type of extent matches with its chunk */
6184 static void check_extent_type(struct extent_record *rec)
6185 {
6186         struct btrfs_block_group_cache *bg_cache;
6187
6188         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6189         if (!bg_cache)
6190                 return;
6191
6192         /* data extent, check chunk directly*/
6193         if (!rec->metadata) {
6194                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6195                         rec->wrong_chunk_type = 1;
6196                 return;
6197         }
6198
6199         /* metadata extent, check the obvious case first */
6200         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6201                                  BTRFS_BLOCK_GROUP_METADATA))) {
6202                 rec->wrong_chunk_type = 1;
6203                 return;
6204         }
6205
6206         /*
6207          * Check SYSTEM extent, as it's also marked as metadata, we can only
6208          * make sure it's a SYSTEM extent by its backref
6209          */
6210         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6211                 struct extent_backref *node;
6212                 struct tree_backref *tback;
6213                 u64 bg_type;
6214
6215                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6216                 if (node->is_data) {
6217                         /* tree block shouldn't have data backref */
6218                         rec->wrong_chunk_type = 1;
6219                         return;
6220                 }
6221                 tback = container_of(node, struct tree_backref, node);
6222
6223                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6224                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6225                 else
6226                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6227                 if (!(bg_cache->flags & bg_type))
6228                         rec->wrong_chunk_type = 1;
6229         }
6230 }
6231
6232 /*
6233  * Allocate a new extent record, fill default values from @tmpl and insert int
6234  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6235  * the cache, otherwise it fails.
6236  */
6237 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6238                 struct extent_record *tmpl)
6239 {
6240         struct extent_record *rec;
6241         int ret = 0;
6242
6243         BUG_ON(tmpl->max_size == 0);
6244         rec = malloc(sizeof(*rec));
6245         if (!rec)
6246                 return -ENOMEM;
6247         rec->start = tmpl->start;
6248         rec->max_size = tmpl->max_size;
6249         rec->nr = max(tmpl->nr, tmpl->max_size);
6250         rec->found_rec = tmpl->found_rec;
6251         rec->content_checked = tmpl->content_checked;
6252         rec->owner_ref_checked = tmpl->owner_ref_checked;
6253         rec->num_duplicates = 0;
6254         rec->metadata = tmpl->metadata;
6255         rec->flag_block_full_backref = FLAG_UNSET;
6256         rec->bad_full_backref = 0;
6257         rec->crossing_stripes = 0;
6258         rec->wrong_chunk_type = 0;
6259         rec->is_root = tmpl->is_root;
6260         rec->refs = tmpl->refs;
6261         rec->extent_item_refs = tmpl->extent_item_refs;
6262         rec->parent_generation = tmpl->parent_generation;
6263         INIT_LIST_HEAD(&rec->backrefs);
6264         INIT_LIST_HEAD(&rec->dups);
6265         INIT_LIST_HEAD(&rec->list);
6266         rec->backref_tree = RB_ROOT;
6267         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6268         rec->cache.start = tmpl->start;
6269         rec->cache.size = tmpl->nr;
6270         ret = insert_cache_extent(extent_cache, &rec->cache);
6271         if (ret) {
6272                 free(rec);
6273                 return ret;
6274         }
6275         bytes_used += rec->nr;
6276
6277         if (tmpl->metadata)
6278                 rec->crossing_stripes = check_crossing_stripes(global_info,
6279                                 rec->start, global_info->nodesize);
6280         check_extent_type(rec);
6281         return ret;
6282 }
6283
6284 /*
6285  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6286  * some are hints:
6287  * - refs              - if found, increase refs
6288  * - is_root           - if found, set
6289  * - content_checked   - if found, set
6290  * - owner_ref_checked - if found, set
6291  *
6292  * If not found, create a new one, initialize and insert.
6293  */
6294 static int add_extent_rec(struct cache_tree *extent_cache,
6295                 struct extent_record *tmpl)
6296 {
6297         struct extent_record *rec;
6298         struct cache_extent *cache;
6299         int ret = 0;
6300         int dup = 0;
6301
6302         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6303         if (cache) {
6304                 rec = container_of(cache, struct extent_record, cache);
6305                 if (tmpl->refs)
6306                         rec->refs++;
6307                 if (rec->nr == 1)
6308                         rec->nr = max(tmpl->nr, tmpl->max_size);
6309
6310                 /*
6311                  * We need to make sure to reset nr to whatever the extent
6312                  * record says was the real size, this way we can compare it to
6313                  * the backrefs.
6314                  */
6315                 if (tmpl->found_rec) {
6316                         if (tmpl->start != rec->start || rec->found_rec) {
6317                                 struct extent_record *tmp;
6318
6319                                 dup = 1;
6320                                 if (list_empty(&rec->list))
6321                                         list_add_tail(&rec->list,
6322                                                       &duplicate_extents);
6323
6324                                 /*
6325                                  * We have to do this song and dance in case we
6326                                  * find an extent record that falls inside of
6327                                  * our current extent record but does not have
6328                                  * the same objectid.
6329                                  */
6330                                 tmp = malloc(sizeof(*tmp));
6331                                 if (!tmp)
6332                                         return -ENOMEM;
6333                                 tmp->start = tmpl->start;
6334                                 tmp->max_size = tmpl->max_size;
6335                                 tmp->nr = tmpl->nr;
6336                                 tmp->found_rec = 1;
6337                                 tmp->metadata = tmpl->metadata;
6338                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6339                                 INIT_LIST_HEAD(&tmp->list);
6340                                 list_add_tail(&tmp->list, &rec->dups);
6341                                 rec->num_duplicates++;
6342                         } else {
6343                                 rec->nr = tmpl->nr;
6344                                 rec->found_rec = 1;
6345                         }
6346                 }
6347
6348                 if (tmpl->extent_item_refs && !dup) {
6349                         if (rec->extent_item_refs) {
6350                                 fprintf(stderr, "block %llu rec "
6351                                         "extent_item_refs %llu, passed %llu\n",
6352                                         (unsigned long long)tmpl->start,
6353                                         (unsigned long long)
6354                                                         rec->extent_item_refs,
6355                                         (unsigned long long)tmpl->extent_item_refs);
6356                         }
6357                         rec->extent_item_refs = tmpl->extent_item_refs;
6358                 }
6359                 if (tmpl->is_root)
6360                         rec->is_root = 1;
6361                 if (tmpl->content_checked)
6362                         rec->content_checked = 1;
6363                 if (tmpl->owner_ref_checked)
6364                         rec->owner_ref_checked = 1;
6365                 memcpy(&rec->parent_key, &tmpl->parent_key,
6366                                 sizeof(tmpl->parent_key));
6367                 if (tmpl->parent_generation)
6368                         rec->parent_generation = tmpl->parent_generation;
6369                 if (rec->max_size < tmpl->max_size)
6370                         rec->max_size = tmpl->max_size;
6371
6372                 /*
6373                  * A metadata extent can't cross stripe_len boundary, otherwise
6374                  * kernel scrub won't be able to handle it.
6375                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6376                  * it.
6377                  */
6378                 if (tmpl->metadata)
6379                         rec->crossing_stripes = check_crossing_stripes(
6380                                         global_info, rec->start,
6381                                         global_info->nodesize);
6382                 check_extent_type(rec);
6383                 maybe_free_extent_rec(extent_cache, rec);
6384                 return ret;
6385         }
6386
6387         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6388
6389         return ret;
6390 }
6391
6392 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6393                             u64 parent, u64 root, int found_ref)
6394 {
6395         struct extent_record *rec;
6396         struct tree_backref *back;
6397         struct cache_extent *cache;
6398         int ret;
6399         bool insert = false;
6400
6401         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6402         if (!cache) {
6403                 struct extent_record tmpl;
6404
6405                 memset(&tmpl, 0, sizeof(tmpl));
6406                 tmpl.start = bytenr;
6407                 tmpl.nr = 1;
6408                 tmpl.metadata = 1;
6409                 tmpl.max_size = 1;
6410
6411                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6412                 if (ret)
6413                         return ret;
6414
6415                 /* really a bug in cache_extent implement now */
6416                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6417                 if (!cache)
6418                         return -ENOENT;
6419         }
6420
6421         rec = container_of(cache, struct extent_record, cache);
6422         if (rec->start != bytenr) {
6423                 /*
6424                  * Several cause, from unaligned bytenr to over lapping extents
6425                  */
6426                 return -EEXIST;
6427         }
6428
6429         back = find_tree_backref(rec, parent, root);
6430         if (!back) {
6431                 back = alloc_tree_backref(rec, parent, root);
6432                 if (!back)
6433                         return -ENOMEM;
6434                 insert = true;
6435         }
6436
6437         if (found_ref) {
6438                 if (back->node.found_ref) {
6439                         fprintf(stderr, "Extent back ref already exists "
6440                                 "for %llu parent %llu root %llu \n",
6441                                 (unsigned long long)bytenr,
6442                                 (unsigned long long)parent,
6443                                 (unsigned long long)root);
6444                 }
6445                 back->node.found_ref = 1;
6446         } else {
6447                 if (back->node.found_extent_tree) {
6448                         fprintf(stderr, "Extent back ref already exists "
6449                                 "for %llu parent %llu root %llu \n",
6450                                 (unsigned long long)bytenr,
6451                                 (unsigned long long)parent,
6452                                 (unsigned long long)root);
6453                 }
6454                 back->node.found_extent_tree = 1;
6455         }
6456         if (insert)
6457                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6458                         compare_extent_backref));
6459         check_extent_type(rec);
6460         maybe_free_extent_rec(extent_cache, rec);
6461         return 0;
6462 }
6463
6464 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6465                             u64 parent, u64 root, u64 owner, u64 offset,
6466                             u32 num_refs, int found_ref, u64 max_size)
6467 {
6468         struct extent_record *rec;
6469         struct data_backref *back;
6470         struct cache_extent *cache;
6471         int ret;
6472         bool insert = false;
6473
6474         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6475         if (!cache) {
6476                 struct extent_record tmpl;
6477
6478                 memset(&tmpl, 0, sizeof(tmpl));
6479                 tmpl.start = bytenr;
6480                 tmpl.nr = 1;
6481                 tmpl.max_size = max_size;
6482
6483                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6484                 if (ret)
6485                         return ret;
6486
6487                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6488                 if (!cache)
6489                         abort();
6490         }
6491
6492         rec = container_of(cache, struct extent_record, cache);
6493         if (rec->max_size < max_size)
6494                 rec->max_size = max_size;
6495
6496         /*
6497          * If found_ref is set then max_size is the real size and must match the
6498          * existing refs.  So if we have already found a ref then we need to
6499          * make sure that this ref matches the existing one, otherwise we need
6500          * to add a new backref so we can notice that the backrefs don't match
6501          * and we need to figure out who is telling the truth.  This is to
6502          * account for that awful fsync bug I introduced where we'd end up with
6503          * a btrfs_file_extent_item that would have its length include multiple
6504          * prealloc extents or point inside of a prealloc extent.
6505          */
6506         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6507                                  bytenr, max_size);
6508         if (!back) {
6509                 back = alloc_data_backref(rec, parent, root, owner, offset,
6510                                           max_size);
6511                 BUG_ON(!back);
6512                 insert = true;
6513         }
6514
6515         if (found_ref) {
6516                 BUG_ON(num_refs != 1);
6517                 if (back->node.found_ref)
6518                         BUG_ON(back->bytes != max_size);
6519                 back->node.found_ref = 1;
6520                 back->found_ref += 1;
6521                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6522                         back->bytes = max_size;
6523                         back->disk_bytenr = bytenr;
6524
6525                         /* Need to reinsert if not already in the tree */
6526                         if (!insert) {
6527                                 rb_erase(&back->node.node, &rec->backref_tree);
6528                                 insert = true;
6529                         }
6530                 }
6531                 rec->refs += 1;
6532                 rec->content_checked = 1;
6533                 rec->owner_ref_checked = 1;
6534         } else {
6535                 if (back->node.found_extent_tree) {
6536                         fprintf(stderr, "Extent back ref already exists "
6537                                 "for %llu parent %llu root %llu "
6538                                 "owner %llu offset %llu num_refs %lu\n",
6539                                 (unsigned long long)bytenr,
6540                                 (unsigned long long)parent,
6541                                 (unsigned long long)root,
6542                                 (unsigned long long)owner,
6543                                 (unsigned long long)offset,
6544                                 (unsigned long)num_refs);
6545                 }
6546                 back->num_refs = num_refs;
6547                 back->node.found_extent_tree = 1;
6548         }
6549         if (insert)
6550                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6551                         compare_extent_backref));
6552
6553         maybe_free_extent_rec(extent_cache, rec);
6554         return 0;
6555 }
6556
6557 static int add_pending(struct cache_tree *pending,
6558                        struct cache_tree *seen, u64 bytenr, u32 size)
6559 {
6560         int ret;
6561         ret = add_cache_extent(seen, bytenr, size);
6562         if (ret)
6563                 return ret;
6564         add_cache_extent(pending, bytenr, size);
6565         return 0;
6566 }
6567
6568 static int pick_next_pending(struct cache_tree *pending,
6569                         struct cache_tree *reada,
6570                         struct cache_tree *nodes,
6571                         u64 last, struct block_info *bits, int bits_nr,
6572                         int *reada_bits)
6573 {
6574         unsigned long node_start = last;
6575         struct cache_extent *cache;
6576         int ret;
6577
6578         cache = search_cache_extent(reada, 0);
6579         if (cache) {
6580                 bits[0].start = cache->start;
6581                 bits[0].size = cache->size;
6582                 *reada_bits = 1;
6583                 return 1;
6584         }
6585         *reada_bits = 0;
6586         if (node_start > 32768)
6587                 node_start -= 32768;
6588
6589         cache = search_cache_extent(nodes, node_start);
6590         if (!cache)
6591                 cache = search_cache_extent(nodes, 0);
6592
6593         if (!cache) {
6594                  cache = search_cache_extent(pending, 0);
6595                  if (!cache)
6596                          return 0;
6597                  ret = 0;
6598                  do {
6599                          bits[ret].start = cache->start;
6600                          bits[ret].size = cache->size;
6601                          cache = next_cache_extent(cache);
6602                          ret++;
6603                  } while (cache && ret < bits_nr);
6604                  return ret;
6605         }
6606
6607         ret = 0;
6608         do {
6609                 bits[ret].start = cache->start;
6610                 bits[ret].size = cache->size;
6611                 cache = next_cache_extent(cache);
6612                 ret++;
6613         } while (cache && ret < bits_nr);
6614
6615         if (bits_nr - ret > 8) {
6616                 u64 lookup = bits[0].start + bits[0].size;
6617                 struct cache_extent *next;
6618                 next = search_cache_extent(pending, lookup);
6619                 while(next) {
6620                         if (next->start - lookup > 32768)
6621                                 break;
6622                         bits[ret].start = next->start;
6623                         bits[ret].size = next->size;
6624                         lookup = next->start + next->size;
6625                         ret++;
6626                         if (ret == bits_nr)
6627                                 break;
6628                         next = next_cache_extent(next);
6629                         if (!next)
6630                                 break;
6631                 }
6632         }
6633         return ret;
6634 }
6635
6636 static void free_chunk_record(struct cache_extent *cache)
6637 {
6638         struct chunk_record *rec;
6639
6640         rec = container_of(cache, struct chunk_record, cache);
6641         list_del_init(&rec->list);
6642         list_del_init(&rec->dextents);
6643         free(rec);
6644 }
6645
6646 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6647 {
6648         cache_tree_free_extents(chunk_cache, free_chunk_record);
6649 }
6650
6651 static void free_device_record(struct rb_node *node)
6652 {
6653         struct device_record *rec;
6654
6655         rec = container_of(node, struct device_record, node);
6656         free(rec);
6657 }
6658
6659 FREE_RB_BASED_TREE(device_cache, free_device_record);
6660
6661 int insert_block_group_record(struct block_group_tree *tree,
6662                               struct block_group_record *bg_rec)
6663 {
6664         int ret;
6665
6666         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6667         if (ret)
6668                 return ret;
6669
6670         list_add_tail(&bg_rec->list, &tree->block_groups);
6671         return 0;
6672 }
6673
6674 static void free_block_group_record(struct cache_extent *cache)
6675 {
6676         struct block_group_record *rec;
6677
6678         rec = container_of(cache, struct block_group_record, cache);
6679         list_del_init(&rec->list);
6680         free(rec);
6681 }
6682
6683 void free_block_group_tree(struct block_group_tree *tree)
6684 {
6685         cache_tree_free_extents(&tree->tree, free_block_group_record);
6686 }
6687
6688 int insert_device_extent_record(struct device_extent_tree *tree,
6689                                 struct device_extent_record *de_rec)
6690 {
6691         int ret;
6692
6693         /*
6694          * Device extent is a bit different from the other extents, because
6695          * the extents which belong to the different devices may have the
6696          * same start and size, so we need use the special extent cache
6697          * search/insert functions.
6698          */
6699         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6700         if (ret)
6701                 return ret;
6702
6703         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6704         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6705         return 0;
6706 }
6707
6708 static void free_device_extent_record(struct cache_extent *cache)
6709 {
6710         struct device_extent_record *rec;
6711
6712         rec = container_of(cache, struct device_extent_record, cache);
6713         if (!list_empty(&rec->chunk_list))
6714                 list_del_init(&rec->chunk_list);
6715         if (!list_empty(&rec->device_list))
6716                 list_del_init(&rec->device_list);
6717         free(rec);
6718 }
6719
6720 void free_device_extent_tree(struct device_extent_tree *tree)
6721 {
6722         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6723 }
6724
6725 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6726 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6727                                  struct extent_buffer *leaf, int slot)
6728 {
6729         struct btrfs_extent_ref_v0 *ref0;
6730         struct btrfs_key key;
6731         int ret;
6732
6733         btrfs_item_key_to_cpu(leaf, &key, slot);
6734         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6735         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6736                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6737                                 0, 0);
6738         } else {
6739                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6740                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6741         }
6742         return ret;
6743 }
6744 #endif
6745
6746 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6747                                             struct btrfs_key *key,
6748                                             int slot)
6749 {
6750         struct btrfs_chunk *ptr;
6751         struct chunk_record *rec;
6752         int num_stripes, i;
6753
6754         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6755         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6756
6757         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6758         if (!rec) {
6759                 fprintf(stderr, "memory allocation failed\n");
6760                 exit(-1);
6761         }
6762
6763         INIT_LIST_HEAD(&rec->list);
6764         INIT_LIST_HEAD(&rec->dextents);
6765         rec->bg_rec = NULL;
6766
6767         rec->cache.start = key->offset;
6768         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6769
6770         rec->generation = btrfs_header_generation(leaf);
6771
6772         rec->objectid = key->objectid;
6773         rec->type = key->type;
6774         rec->offset = key->offset;
6775
6776         rec->length = rec->cache.size;
6777         rec->owner = btrfs_chunk_owner(leaf, ptr);
6778         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6779         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6780         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6781         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6782         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6783         rec->num_stripes = num_stripes;
6784         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6785
6786         for (i = 0; i < rec->num_stripes; ++i) {
6787                 rec->stripes[i].devid =
6788                         btrfs_stripe_devid_nr(leaf, ptr, i);
6789                 rec->stripes[i].offset =
6790                         btrfs_stripe_offset_nr(leaf, ptr, i);
6791                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6792                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6793                                 BTRFS_UUID_SIZE);
6794         }
6795
6796         return rec;
6797 }
6798
6799 static int process_chunk_item(struct cache_tree *chunk_cache,
6800                               struct btrfs_key *key, struct extent_buffer *eb,
6801                               int slot)
6802 {
6803         struct chunk_record *rec;
6804         struct btrfs_chunk *chunk;
6805         int ret = 0;
6806
6807         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6808         /*
6809          * Do extra check for this chunk item,
6810          *
6811          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6812          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6813          * and owner<->key_type check.
6814          */
6815         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6816                                       key->offset);
6817         if (ret < 0) {
6818                 error("chunk(%llu, %llu) is not valid, ignore it",
6819                       key->offset, btrfs_chunk_length(eb, chunk));
6820                 return 0;
6821         }
6822         rec = btrfs_new_chunk_record(eb, key, slot);
6823         ret = insert_cache_extent(chunk_cache, &rec->cache);
6824         if (ret) {
6825                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6826                         rec->offset, rec->length);
6827                 free(rec);
6828         }
6829
6830         return ret;
6831 }
6832
6833 static int process_device_item(struct rb_root *dev_cache,
6834                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6835 {
6836         struct btrfs_dev_item *ptr;
6837         struct device_record *rec;
6838         int ret = 0;
6839
6840         ptr = btrfs_item_ptr(eb,
6841                 slot, struct btrfs_dev_item);
6842
6843         rec = malloc(sizeof(*rec));
6844         if (!rec) {
6845                 fprintf(stderr, "memory allocation failed\n");
6846                 return -ENOMEM;
6847         }
6848
6849         rec->devid = key->offset;
6850         rec->generation = btrfs_header_generation(eb);
6851
6852         rec->objectid = key->objectid;
6853         rec->type = key->type;
6854         rec->offset = key->offset;
6855
6856         rec->devid = btrfs_device_id(eb, ptr);
6857         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6858         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6859
6860         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6861         if (ret) {
6862                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6863                 free(rec);
6864         }
6865
6866         return ret;
6867 }
6868
6869 struct block_group_record *
6870 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6871                              int slot)
6872 {
6873         struct btrfs_block_group_item *ptr;
6874         struct block_group_record *rec;
6875
6876         rec = calloc(1, sizeof(*rec));
6877         if (!rec) {
6878                 fprintf(stderr, "memory allocation failed\n");
6879                 exit(-1);
6880         }
6881
6882         rec->cache.start = key->objectid;
6883         rec->cache.size = key->offset;
6884
6885         rec->generation = btrfs_header_generation(leaf);
6886
6887         rec->objectid = key->objectid;
6888         rec->type = key->type;
6889         rec->offset = key->offset;
6890
6891         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6892         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6893
6894         INIT_LIST_HEAD(&rec->list);
6895
6896         return rec;
6897 }
6898
6899 static int process_block_group_item(struct block_group_tree *block_group_cache,
6900                                     struct btrfs_key *key,
6901                                     struct extent_buffer *eb, int slot)
6902 {
6903         struct block_group_record *rec;
6904         int ret = 0;
6905
6906         rec = btrfs_new_block_group_record(eb, key, slot);
6907         ret = insert_block_group_record(block_group_cache, rec);
6908         if (ret) {
6909                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6910                         rec->objectid, rec->offset);
6911                 free(rec);
6912         }
6913
6914         return ret;
6915 }
6916
6917 struct device_extent_record *
6918 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6919                                struct btrfs_key *key, int slot)
6920 {
6921         struct device_extent_record *rec;
6922         struct btrfs_dev_extent *ptr;
6923
6924         rec = calloc(1, sizeof(*rec));
6925         if (!rec) {
6926                 fprintf(stderr, "memory allocation failed\n");
6927                 exit(-1);
6928         }
6929
6930         rec->cache.objectid = key->objectid;
6931         rec->cache.start = key->offset;
6932
6933         rec->generation = btrfs_header_generation(leaf);
6934
6935         rec->objectid = key->objectid;
6936         rec->type = key->type;
6937         rec->offset = key->offset;
6938
6939         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6940         rec->chunk_objecteid =
6941                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6942         rec->chunk_offset =
6943                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6944         rec->length = btrfs_dev_extent_length(leaf, ptr);
6945         rec->cache.size = rec->length;
6946
6947         INIT_LIST_HEAD(&rec->chunk_list);
6948         INIT_LIST_HEAD(&rec->device_list);
6949
6950         return rec;
6951 }
6952
6953 static int
6954 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6955                            struct btrfs_key *key, struct extent_buffer *eb,
6956                            int slot)
6957 {
6958         struct device_extent_record *rec;
6959         int ret;
6960
6961         rec = btrfs_new_device_extent_record(eb, key, slot);
6962         ret = insert_device_extent_record(dev_extent_cache, rec);
6963         if (ret) {
6964                 fprintf(stderr,
6965                         "Device extent[%llu, %llu, %llu] existed.\n",
6966                         rec->objectid, rec->offset, rec->length);
6967                 free(rec);
6968         }
6969
6970         return ret;
6971 }
6972
6973 static int process_extent_item(struct btrfs_root *root,
6974                                struct cache_tree *extent_cache,
6975                                struct extent_buffer *eb, int slot)
6976 {
6977         struct btrfs_extent_item *ei;
6978         struct btrfs_extent_inline_ref *iref;
6979         struct btrfs_extent_data_ref *dref;
6980         struct btrfs_shared_data_ref *sref;
6981         struct btrfs_key key;
6982         struct extent_record tmpl;
6983         unsigned long end;
6984         unsigned long ptr;
6985         int ret;
6986         int type;
6987         u32 item_size = btrfs_item_size_nr(eb, slot);
6988         u64 refs = 0;
6989         u64 offset;
6990         u64 num_bytes;
6991         int metadata = 0;
6992
6993         btrfs_item_key_to_cpu(eb, &key, slot);
6994
6995         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6996                 metadata = 1;
6997                 num_bytes = root->fs_info->nodesize;
6998         } else {
6999                 num_bytes = key.offset;
7000         }
7001
7002         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7003                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7004                       key.objectid, root->fs_info->sectorsize);
7005                 return -EIO;
7006         }
7007         if (item_size < sizeof(*ei)) {
7008 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7009                 struct btrfs_extent_item_v0 *ei0;
7010                 BUG_ON(item_size != sizeof(*ei0));
7011                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7012                 refs = btrfs_extent_refs_v0(eb, ei0);
7013 #else
7014                 BUG();
7015 #endif
7016                 memset(&tmpl, 0, sizeof(tmpl));
7017                 tmpl.start = key.objectid;
7018                 tmpl.nr = num_bytes;
7019                 tmpl.extent_item_refs = refs;
7020                 tmpl.metadata = metadata;
7021                 tmpl.found_rec = 1;
7022                 tmpl.max_size = num_bytes;
7023
7024                 return add_extent_rec(extent_cache, &tmpl);
7025         }
7026
7027         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7028         refs = btrfs_extent_refs(eb, ei);
7029         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7030                 metadata = 1;
7031         else
7032                 metadata = 0;
7033         if (metadata && num_bytes != root->fs_info->nodesize) {
7034                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7035                       num_bytes, root->fs_info->nodesize);
7036                 return -EIO;
7037         }
7038         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7039                 error("ignore invalid data extent, length %llu is not aligned to %u",
7040                       num_bytes, root->fs_info->sectorsize);
7041                 return -EIO;
7042         }
7043
7044         memset(&tmpl, 0, sizeof(tmpl));
7045         tmpl.start = key.objectid;
7046         tmpl.nr = num_bytes;
7047         tmpl.extent_item_refs = refs;
7048         tmpl.metadata = metadata;
7049         tmpl.found_rec = 1;
7050         tmpl.max_size = num_bytes;
7051         add_extent_rec(extent_cache, &tmpl);
7052
7053         ptr = (unsigned long)(ei + 1);
7054         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7055             key.type == BTRFS_EXTENT_ITEM_KEY)
7056                 ptr += sizeof(struct btrfs_tree_block_info);
7057
7058         end = (unsigned long)ei + item_size;
7059         while (ptr < end) {
7060                 iref = (struct btrfs_extent_inline_ref *)ptr;
7061                 type = btrfs_extent_inline_ref_type(eb, iref);
7062                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7063                 switch (type) {
7064                 case BTRFS_TREE_BLOCK_REF_KEY:
7065                         ret = add_tree_backref(extent_cache, key.objectid,
7066                                         0, offset, 0);
7067                         if (ret < 0)
7068                                 error(
7069                         "add_tree_backref failed (extent items tree block): %s",
7070                                       strerror(-ret));
7071                         break;
7072                 case BTRFS_SHARED_BLOCK_REF_KEY:
7073                         ret = add_tree_backref(extent_cache, key.objectid,
7074                                         offset, 0, 0);
7075                         if (ret < 0)
7076                                 error(
7077                         "add_tree_backref failed (extent items shared block): %s",
7078                                       strerror(-ret));
7079                         break;
7080                 case BTRFS_EXTENT_DATA_REF_KEY:
7081                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7082                         add_data_backref(extent_cache, key.objectid, 0,
7083                                         btrfs_extent_data_ref_root(eb, dref),
7084                                         btrfs_extent_data_ref_objectid(eb,
7085                                                                        dref),
7086                                         btrfs_extent_data_ref_offset(eb, dref),
7087                                         btrfs_extent_data_ref_count(eb, dref),
7088                                         0, num_bytes);
7089                         break;
7090                 case BTRFS_SHARED_DATA_REF_KEY:
7091                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7092                         add_data_backref(extent_cache, key.objectid, offset,
7093                                         0, 0, 0,
7094                                         btrfs_shared_data_ref_count(eb, sref),
7095                                         0, num_bytes);
7096                         break;
7097                 default:
7098                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7099                                 key.objectid, key.type, num_bytes);
7100                         goto out;
7101                 }
7102                 ptr += btrfs_extent_inline_ref_size(type);
7103         }
7104         WARN_ON(ptr > end);
7105 out:
7106         return 0;
7107 }
7108
7109 static int check_cache_range(struct btrfs_root *root,
7110                              struct btrfs_block_group_cache *cache,
7111                              u64 offset, u64 bytes)
7112 {
7113         struct btrfs_free_space *entry;
7114         u64 *logical;
7115         u64 bytenr;
7116         int stripe_len;
7117         int i, nr, ret;
7118
7119         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7120                 bytenr = btrfs_sb_offset(i);
7121                 ret = btrfs_rmap_block(root->fs_info,
7122                                        cache->key.objectid, bytenr, 0,
7123                                        &logical, &nr, &stripe_len);
7124                 if (ret)
7125                         return ret;
7126
7127                 while (nr--) {
7128                         if (logical[nr] + stripe_len <= offset)
7129                                 continue;
7130                         if (offset + bytes <= logical[nr])
7131                                 continue;
7132                         if (logical[nr] == offset) {
7133                                 if (stripe_len >= bytes) {
7134                                         free(logical);
7135                                         return 0;
7136                                 }
7137                                 bytes -= stripe_len;
7138                                 offset += stripe_len;
7139                         } else if (logical[nr] < offset) {
7140                                 if (logical[nr] + stripe_len >=
7141                                     offset + bytes) {
7142                                         free(logical);
7143                                         return 0;
7144                                 }
7145                                 bytes = (offset + bytes) -
7146                                         (logical[nr] + stripe_len);
7147                                 offset = logical[nr] + stripe_len;
7148                         } else {
7149                                 /*
7150                                  * Could be tricky, the super may land in the
7151                                  * middle of the area we're checking.  First
7152                                  * check the easiest case, it's at the end.
7153                                  */
7154                                 if (logical[nr] + stripe_len >=
7155                                     bytes + offset) {
7156                                         bytes = logical[nr] - offset;
7157                                         continue;
7158                                 }
7159
7160                                 /* Check the left side */
7161                                 ret = check_cache_range(root, cache,
7162                                                         offset,
7163                                                         logical[nr] - offset);
7164                                 if (ret) {
7165                                         free(logical);
7166                                         return ret;
7167                                 }
7168
7169                                 /* Now we continue with the right side */
7170                                 bytes = (offset + bytes) -
7171                                         (logical[nr] + stripe_len);
7172                                 offset = logical[nr] + stripe_len;
7173                         }
7174                 }
7175
7176                 free(logical);
7177         }
7178
7179         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7180         if (!entry) {
7181                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7182                         offset, offset+bytes);
7183                 return -EINVAL;
7184         }
7185
7186         if (entry->offset != offset) {
7187                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7188                         entry->offset);
7189                 return -EINVAL;
7190         }
7191
7192         if (entry->bytes != bytes) {
7193                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7194                         bytes, entry->bytes, offset);
7195                 return -EINVAL;
7196         }
7197
7198         unlink_free_space(cache->free_space_ctl, entry);
7199         free(entry);
7200         return 0;
7201 }
7202
7203 static int verify_space_cache(struct btrfs_root *root,
7204                               struct btrfs_block_group_cache *cache)
7205 {
7206         struct btrfs_path path;
7207         struct extent_buffer *leaf;
7208         struct btrfs_key key;
7209         u64 last;
7210         int ret = 0;
7211
7212         root = root->fs_info->extent_root;
7213
7214         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7215
7216         btrfs_init_path(&path);
7217         key.objectid = last;
7218         key.offset = 0;
7219         key.type = BTRFS_EXTENT_ITEM_KEY;
7220         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7221         if (ret < 0)
7222                 goto out;
7223         ret = 0;
7224         while (1) {
7225                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7226                         ret = btrfs_next_leaf(root, &path);
7227                         if (ret < 0)
7228                                 goto out;
7229                         if (ret > 0) {
7230                                 ret = 0;
7231                                 break;
7232                         }
7233                 }
7234                 leaf = path.nodes[0];
7235                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7236                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7237                         break;
7238                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7239                     key.type != BTRFS_METADATA_ITEM_KEY) {
7240                         path.slots[0]++;
7241                         continue;
7242                 }
7243
7244                 if (last == key.objectid) {
7245                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7246                                 last = key.objectid + key.offset;
7247                         else
7248                                 last = key.objectid + root->fs_info->nodesize;
7249                         path.slots[0]++;
7250                         continue;
7251                 }
7252
7253                 ret = check_cache_range(root, cache, last,
7254                                         key.objectid - last);
7255                 if (ret)
7256                         break;
7257                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7258                         last = key.objectid + key.offset;
7259                 else
7260                         last = key.objectid + root->fs_info->nodesize;
7261                 path.slots[0]++;
7262         }
7263
7264         if (last < cache->key.objectid + cache->key.offset)
7265                 ret = check_cache_range(root, cache, last,
7266                                         cache->key.objectid +
7267                                         cache->key.offset - last);
7268
7269 out:
7270         btrfs_release_path(&path);
7271
7272         if (!ret &&
7273             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7274                 fprintf(stderr, "There are still entries left in the space "
7275                         "cache\n");
7276                 ret = -EINVAL;
7277         }
7278
7279         return ret;
7280 }
7281
7282 static int check_space_cache(struct btrfs_root *root)
7283 {
7284         struct btrfs_block_group_cache *cache;
7285         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7286         int ret;
7287         int error = 0;
7288
7289         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7290             btrfs_super_generation(root->fs_info->super_copy) !=
7291             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7292                 printf("cache and super generation don't match, space cache "
7293                        "will be invalidated\n");
7294                 return 0;
7295         }
7296
7297         if (ctx.progress_enabled) {
7298                 ctx.tp = TASK_FREE_SPACE;
7299                 task_start(ctx.info);
7300         }
7301
7302         while (1) {
7303                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7304                 if (!cache)
7305                         break;
7306
7307                 start = cache->key.objectid + cache->key.offset;
7308                 if (!cache->free_space_ctl) {
7309                         if (btrfs_init_free_space_ctl(cache,
7310                                                 root->fs_info->sectorsize)) {
7311                                 ret = -ENOMEM;
7312                                 break;
7313                         }
7314                 } else {
7315                         btrfs_remove_free_space_cache(cache);
7316                 }
7317
7318                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7319                         ret = exclude_super_stripes(root, cache);
7320                         if (ret) {
7321                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7322                                         strerror(-ret));
7323                                 error++;
7324                                 continue;
7325                         }
7326                         ret = load_free_space_tree(root->fs_info, cache);
7327                         free_excluded_extents(root, cache);
7328                         if (ret < 0) {
7329                                 fprintf(stderr, "could not load free space tree: %s\n",
7330                                         strerror(-ret));
7331                                 error++;
7332                                 continue;
7333                         }
7334                         error += ret;
7335                 } else {
7336                         ret = load_free_space_cache(root->fs_info, cache);
7337                         if (!ret)
7338                                 continue;
7339                 }
7340
7341                 ret = verify_space_cache(root, cache);
7342                 if (ret) {
7343                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7344                                 cache->key.objectid);
7345                         error++;
7346                 }
7347         }
7348
7349         task_stop(ctx.info);
7350
7351         return error ? -EINVAL : 0;
7352 }
7353
7354 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7355                         u64 num_bytes, unsigned long leaf_offset,
7356                         struct extent_buffer *eb) {
7357
7358         struct btrfs_fs_info *fs_info = root->fs_info;
7359         u64 offset = 0;
7360         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7361         char *data;
7362         unsigned long csum_offset;
7363         u32 csum;
7364         u32 csum_expected;
7365         u64 read_len;
7366         u64 data_checked = 0;
7367         u64 tmp;
7368         int ret = 0;
7369         int mirror;
7370         int num_copies;
7371
7372         if (num_bytes % fs_info->sectorsize)
7373                 return -EINVAL;
7374
7375         data = malloc(num_bytes);
7376         if (!data)
7377                 return -ENOMEM;
7378
7379         while (offset < num_bytes) {
7380                 mirror = 0;
7381 again:
7382                 read_len = num_bytes - offset;
7383                 /* read as much space once a time */
7384                 ret = read_extent_data(fs_info, data + offset,
7385                                 bytenr + offset, &read_len, mirror);
7386                 if (ret)
7387                         goto out;
7388                 data_checked = 0;
7389                 /* verify every 4k data's checksum */
7390                 while (data_checked < read_len) {
7391                         csum = ~(u32)0;
7392                         tmp = offset + data_checked;
7393
7394                         csum = btrfs_csum_data((char *)data + tmp,
7395                                                csum, fs_info->sectorsize);
7396                         btrfs_csum_final(csum, (u8 *)&csum);
7397
7398                         csum_offset = leaf_offset +
7399                                  tmp / fs_info->sectorsize * csum_size;
7400                         read_extent_buffer(eb, (char *)&csum_expected,
7401                                            csum_offset, csum_size);
7402                         /* try another mirror */
7403                         if (csum != csum_expected) {
7404                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7405                                                 mirror, bytenr + tmp,
7406                                                 csum, csum_expected);
7407                                 num_copies = btrfs_num_copies(root->fs_info,
7408                                                 bytenr, num_bytes);
7409                                 if (mirror < num_copies - 1) {
7410                                         mirror += 1;
7411                                         goto again;
7412                                 }
7413                         }
7414                         data_checked += fs_info->sectorsize;
7415                 }
7416                 offset += read_len;
7417         }
7418 out:
7419         free(data);
7420         return ret;
7421 }
7422
7423 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7424                                u64 num_bytes)
7425 {
7426         struct btrfs_path path;
7427         struct extent_buffer *leaf;
7428         struct btrfs_key key;
7429         int ret;
7430
7431         btrfs_init_path(&path);
7432         key.objectid = bytenr;
7433         key.type = BTRFS_EXTENT_ITEM_KEY;
7434         key.offset = (u64)-1;
7435
7436 again:
7437         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7438                                 0, 0);
7439         if (ret < 0) {
7440                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7441                 btrfs_release_path(&path);
7442                 return ret;
7443         } else if (ret) {
7444                 if (path.slots[0] > 0) {
7445                         path.slots[0]--;
7446                 } else {
7447                         ret = btrfs_prev_leaf(root, &path);
7448                         if (ret < 0) {
7449                                 goto out;
7450                         } else if (ret > 0) {
7451                                 ret = 0;
7452                                 goto out;
7453                         }
7454                 }
7455         }
7456
7457         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7458
7459         /*
7460          * Block group items come before extent items if they have the same
7461          * bytenr, so walk back one more just in case.  Dear future traveller,
7462          * first congrats on mastering time travel.  Now if it's not too much
7463          * trouble could you go back to 2006 and tell Chris to make the
7464          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7465          * EXTENT_ITEM_KEY please?
7466          */
7467         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7468                 if (path.slots[0] > 0) {
7469                         path.slots[0]--;
7470                 } else {
7471                         ret = btrfs_prev_leaf(root, &path);
7472                         if (ret < 0) {
7473                                 goto out;
7474                         } else if (ret > 0) {
7475                                 ret = 0;
7476                                 goto out;
7477                         }
7478                 }
7479                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7480         }
7481
7482         while (num_bytes) {
7483                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7484                         ret = btrfs_next_leaf(root, &path);
7485                         if (ret < 0) {
7486                                 fprintf(stderr, "Error going to next leaf "
7487                                         "%d\n", ret);
7488                                 btrfs_release_path(&path);
7489                                 return ret;
7490                         } else if (ret) {
7491                                 break;
7492                         }
7493                 }
7494                 leaf = path.nodes[0];
7495                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7496                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7497                         path.slots[0]++;
7498                         continue;
7499                 }
7500                 if (key.objectid + key.offset < bytenr) {
7501                         path.slots[0]++;
7502                         continue;
7503                 }
7504                 if (key.objectid > bytenr + num_bytes)
7505                         break;
7506
7507                 if (key.objectid == bytenr) {
7508                         if (key.offset >= num_bytes) {
7509                                 num_bytes = 0;
7510                                 break;
7511                         }
7512                         num_bytes -= key.offset;
7513                         bytenr += key.offset;
7514                 } else if (key.objectid < bytenr) {
7515                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7516                                 num_bytes = 0;
7517                                 break;
7518                         }
7519                         num_bytes = (bytenr + num_bytes) -
7520                                 (key.objectid + key.offset);
7521                         bytenr = key.objectid + key.offset;
7522                 } else {
7523                         if (key.objectid + key.offset < bytenr + num_bytes) {
7524                                 u64 new_start = key.objectid + key.offset;
7525                                 u64 new_bytes = bytenr + num_bytes - new_start;
7526
7527                                 /*
7528                                  * Weird case, the extent is in the middle of
7529                                  * our range, we'll have to search one side
7530                                  * and then the other.  Not sure if this happens
7531                                  * in real life, but no harm in coding it up
7532                                  * anyway just in case.
7533                                  */
7534                                 btrfs_release_path(&path);
7535                                 ret = check_extent_exists(root, new_start,
7536                                                           new_bytes);
7537                                 if (ret) {
7538                                         fprintf(stderr, "Right section didn't "
7539                                                 "have a record\n");
7540                                         break;
7541                                 }
7542                                 num_bytes = key.objectid - bytenr;
7543                                 goto again;
7544                         }
7545                         num_bytes = key.objectid - bytenr;
7546                 }
7547                 path.slots[0]++;
7548         }
7549         ret = 0;
7550
7551 out:
7552         if (num_bytes && !ret) {
7553                 fprintf(stderr, "There are no extents for csum range "
7554                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7555                 ret = 1;
7556         }
7557
7558         btrfs_release_path(&path);
7559         return ret;
7560 }
7561
7562 static int check_csums(struct btrfs_root *root)
7563 {
7564         struct btrfs_path path;
7565         struct extent_buffer *leaf;
7566         struct btrfs_key key;
7567         u64 offset = 0, num_bytes = 0;
7568         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7569         int errors = 0;
7570         int ret;
7571         u64 data_len;
7572         unsigned long leaf_offset;
7573
7574         root = root->fs_info->csum_root;
7575         if (!extent_buffer_uptodate(root->node)) {
7576                 fprintf(stderr, "No valid csum tree found\n");
7577                 return -ENOENT;
7578         }
7579
7580         btrfs_init_path(&path);
7581         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7582         key.type = BTRFS_EXTENT_CSUM_KEY;
7583         key.offset = 0;
7584         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7585         if (ret < 0) {
7586                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7587                 btrfs_release_path(&path);
7588                 return ret;
7589         }
7590
7591         if (ret > 0 && path.slots[0])
7592                 path.slots[0]--;
7593         ret = 0;
7594
7595         while (1) {
7596                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7597                         ret = btrfs_next_leaf(root, &path);
7598                         if (ret < 0) {
7599                                 fprintf(stderr, "Error going to next leaf "
7600                                         "%d\n", ret);
7601                                 break;
7602                         }
7603                         if (ret)
7604                                 break;
7605                 }
7606                 leaf = path.nodes[0];
7607
7608                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7609                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7610                         path.slots[0]++;
7611                         continue;
7612                 }
7613
7614                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7615                               csum_size) * root->fs_info->sectorsize;
7616                 if (!check_data_csum)
7617                         goto skip_csum_check;
7618                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7619                 ret = check_extent_csums(root, key.offset, data_len,
7620                                          leaf_offset, leaf);
7621                 if (ret)
7622                         break;
7623 skip_csum_check:
7624                 if (!num_bytes) {
7625                         offset = key.offset;
7626                 } else if (key.offset != offset + num_bytes) {
7627                         ret = check_extent_exists(root, offset, num_bytes);
7628                         if (ret) {
7629                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7630                                         "there is no extent record\n",
7631                                         offset, offset+num_bytes);
7632                                 errors++;
7633                         }
7634                         offset = key.offset;
7635                         num_bytes = 0;
7636                 }
7637                 num_bytes += data_len;
7638                 path.slots[0]++;
7639         }
7640
7641         btrfs_release_path(&path);
7642         return errors;
7643 }
7644
7645 static int is_dropped_key(struct btrfs_key *key,
7646                           struct btrfs_key *drop_key) {
7647         if (key->objectid < drop_key->objectid)
7648                 return 1;
7649         else if (key->objectid == drop_key->objectid) {
7650                 if (key->type < drop_key->type)
7651                         return 1;
7652                 else if (key->type == drop_key->type) {
7653                         if (key->offset < drop_key->offset)
7654                                 return 1;
7655                 }
7656         }
7657         return 0;
7658 }
7659
7660 /*
7661  * Here are the rules for FULL_BACKREF.
7662  *
7663  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7664  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7665  *      FULL_BACKREF set.
7666  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7667  *    if it happened after the relocation occurred since we'll have dropped the
7668  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7669  *    have no real way to know for sure.
7670  *
7671  * We process the blocks one root at a time, and we start from the lowest root
7672  * objectid and go to the highest.  So we can just lookup the owner backref for
7673  * the record and if we don't find it then we know it doesn't exist and we have
7674  * a FULL BACKREF.
7675  *
7676  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7677  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7678  * be set or not and then we can check later once we've gathered all the refs.
7679  */
7680 static int calc_extent_flag(struct cache_tree *extent_cache,
7681                            struct extent_buffer *buf,
7682                            struct root_item_record *ri,
7683                            u64 *flags)
7684 {
7685         struct extent_record *rec;
7686         struct cache_extent *cache;
7687         struct tree_backref *tback;
7688         u64 owner = 0;
7689
7690         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7691         /* we have added this extent before */
7692         if (!cache)
7693                 return -ENOENT;
7694
7695         rec = container_of(cache, struct extent_record, cache);
7696
7697         /*
7698          * Except file/reloc tree, we can not have
7699          * FULL BACKREF MODE
7700          */
7701         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7702                 goto normal;
7703         /*
7704          * root node
7705          */
7706         if (buf->start == ri->bytenr)
7707                 goto normal;
7708
7709         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7710                 goto full_backref;
7711
7712         owner = btrfs_header_owner(buf);
7713         if (owner == ri->objectid)
7714                 goto normal;
7715
7716         tback = find_tree_backref(rec, 0, owner);
7717         if (!tback)
7718                 goto full_backref;
7719 normal:
7720         *flags = 0;
7721         if (rec->flag_block_full_backref != FLAG_UNSET &&
7722             rec->flag_block_full_backref != 0)
7723                 rec->bad_full_backref = 1;
7724         return 0;
7725 full_backref:
7726         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7727         if (rec->flag_block_full_backref != FLAG_UNSET &&
7728             rec->flag_block_full_backref != 1)
7729                 rec->bad_full_backref = 1;
7730         return 0;
7731 }
7732
7733 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7734 {
7735         fprintf(stderr, "Invalid key type(");
7736         print_key_type(stderr, 0, key_type);
7737         fprintf(stderr, ") found in root(");
7738         print_objectid(stderr, rootid, 0);
7739         fprintf(stderr, ")\n");
7740 }
7741
7742 /*
7743  * Check if the key is valid with its extent buffer.
7744  *
7745  * This is a early check in case invalid key exists in a extent buffer
7746  * This is not comprehensive yet, but should prevent wrong key/item passed
7747  * further
7748  */
7749 static int check_type_with_root(u64 rootid, u8 key_type)
7750 {
7751         switch (key_type) {
7752         /* Only valid in chunk tree */
7753         case BTRFS_DEV_ITEM_KEY:
7754         case BTRFS_CHUNK_ITEM_KEY:
7755                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7756                         goto err;
7757                 break;
7758         /* valid in csum and log tree */
7759         case BTRFS_CSUM_TREE_OBJECTID:
7760                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7761                       is_fstree(rootid)))
7762                         goto err;
7763                 break;
7764         case BTRFS_EXTENT_ITEM_KEY:
7765         case BTRFS_METADATA_ITEM_KEY:
7766         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7767                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7768                         goto err;
7769                 break;
7770         case BTRFS_ROOT_ITEM_KEY:
7771                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7772                         goto err;
7773                 break;
7774         case BTRFS_DEV_EXTENT_KEY:
7775                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7776                         goto err;
7777                 break;
7778         }
7779         return 0;
7780 err:
7781         report_mismatch_key_root(key_type, rootid);
7782         return -EINVAL;
7783 }
7784
7785 static int run_next_block(struct btrfs_root *root,
7786                           struct block_info *bits,
7787                           int bits_nr,
7788                           u64 *last,
7789                           struct cache_tree *pending,
7790                           struct cache_tree *seen,
7791                           struct cache_tree *reada,
7792                           struct cache_tree *nodes,
7793                           struct cache_tree *extent_cache,
7794                           struct cache_tree *chunk_cache,
7795                           struct rb_root *dev_cache,
7796                           struct block_group_tree *block_group_cache,
7797                           struct device_extent_tree *dev_extent_cache,
7798                           struct root_item_record *ri)
7799 {
7800         struct btrfs_fs_info *fs_info = root->fs_info;
7801         struct extent_buffer *buf;
7802         struct extent_record *rec = NULL;
7803         u64 bytenr;
7804         u32 size;
7805         u64 parent;
7806         u64 owner;
7807         u64 flags;
7808         u64 ptr;
7809         u64 gen = 0;
7810         int ret = 0;
7811         int i;
7812         int nritems;
7813         struct btrfs_key key;
7814         struct cache_extent *cache;
7815         int reada_bits;
7816
7817         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7818                                     bits_nr, &reada_bits);
7819         if (nritems == 0)
7820                 return 1;
7821
7822         if (!reada_bits) {
7823                 for(i = 0; i < nritems; i++) {
7824                         ret = add_cache_extent(reada, bits[i].start,
7825                                                bits[i].size);
7826                         if (ret == -EEXIST)
7827                                 continue;
7828
7829                         /* fixme, get the parent transid */
7830                         readahead_tree_block(fs_info, bits[i].start, 0);
7831                 }
7832         }
7833         *last = bits[0].start;
7834         bytenr = bits[0].start;
7835         size = bits[0].size;
7836
7837         cache = lookup_cache_extent(pending, bytenr, size);
7838         if (cache) {
7839                 remove_cache_extent(pending, cache);
7840                 free(cache);
7841         }
7842         cache = lookup_cache_extent(reada, bytenr, size);
7843         if (cache) {
7844                 remove_cache_extent(reada, cache);
7845                 free(cache);
7846         }
7847         cache = lookup_cache_extent(nodes, bytenr, size);
7848         if (cache) {
7849                 remove_cache_extent(nodes, cache);
7850                 free(cache);
7851         }
7852         cache = lookup_cache_extent(extent_cache, bytenr, size);
7853         if (cache) {
7854                 rec = container_of(cache, struct extent_record, cache);
7855                 gen = rec->parent_generation;
7856         }
7857
7858         /* fixme, get the real parent transid */
7859         buf = read_tree_block(root->fs_info, bytenr, gen);
7860         if (!extent_buffer_uptodate(buf)) {
7861                 record_bad_block_io(root->fs_info,
7862                                     extent_cache, bytenr, size);
7863                 goto out;
7864         }
7865
7866         nritems = btrfs_header_nritems(buf);
7867
7868         flags = 0;
7869         if (!init_extent_tree) {
7870                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7871                                        btrfs_header_level(buf), 1, NULL,
7872                                        &flags);
7873                 if (ret < 0) {
7874                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7875                         if (ret < 0) {
7876                                 fprintf(stderr, "Couldn't calc extent flags\n");
7877                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7878                         }
7879                 }
7880         } else {
7881                 flags = 0;
7882                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7883                 if (ret < 0) {
7884                         fprintf(stderr, "Couldn't calc extent flags\n");
7885                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7886                 }
7887         }
7888
7889         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7890                 if (ri != NULL &&
7891                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7892                     ri->objectid == btrfs_header_owner(buf)) {
7893                         /*
7894                          * Ok we got to this block from it's original owner and
7895                          * we have FULL_BACKREF set.  Relocation can leave
7896                          * converted blocks over so this is altogether possible,
7897                          * however it's not possible if the generation > the
7898                          * last snapshot, so check for this case.
7899                          */
7900                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7901                             btrfs_header_generation(buf) > ri->last_snapshot) {
7902                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7903                                 rec->bad_full_backref = 1;
7904                         }
7905                 }
7906         } else {
7907                 if (ri != NULL &&
7908                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7909                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7910                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7911                         rec->bad_full_backref = 1;
7912                 }
7913         }
7914
7915         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7916                 rec->flag_block_full_backref = 1;
7917                 parent = bytenr;
7918                 owner = 0;
7919         } else {
7920                 rec->flag_block_full_backref = 0;
7921                 parent = 0;
7922                 owner = btrfs_header_owner(buf);
7923         }
7924
7925         ret = check_block(root, extent_cache, buf, flags);
7926         if (ret)
7927                 goto out;
7928
7929         if (btrfs_is_leaf(buf)) {
7930                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7931                 for (i = 0; i < nritems; i++) {
7932                         struct btrfs_file_extent_item *fi;
7933                         btrfs_item_key_to_cpu(buf, &key, i);
7934                         /*
7935                          * Check key type against the leaf owner.
7936                          * Could filter quite a lot of early error if
7937                          * owner is correct
7938                          */
7939                         if (check_type_with_root(btrfs_header_owner(buf),
7940                                                  key.type)) {
7941                                 fprintf(stderr, "ignoring invalid key\n");
7942                                 continue;
7943                         }
7944                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7945                                 process_extent_item(root, extent_cache, buf,
7946                                                     i);
7947                                 continue;
7948                         }
7949                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7950                                 process_extent_item(root, extent_cache, buf,
7951                                                     i);
7952                                 continue;
7953                         }
7954                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7955                                 total_csum_bytes +=
7956                                         btrfs_item_size_nr(buf, i);
7957                                 continue;
7958                         }
7959                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7960                                 process_chunk_item(chunk_cache, &key, buf, i);
7961                                 continue;
7962                         }
7963                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7964                                 process_device_item(dev_cache, &key, buf, i);
7965                                 continue;
7966                         }
7967                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7968                                 process_block_group_item(block_group_cache,
7969                                         &key, buf, i);
7970                                 continue;
7971                         }
7972                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7973                                 process_device_extent_item(dev_extent_cache,
7974                                         &key, buf, i);
7975                                 continue;
7976
7977                         }
7978                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7979 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7980                                 process_extent_ref_v0(extent_cache, buf, i);
7981 #else
7982                                 BUG();
7983 #endif
7984                                 continue;
7985                         }
7986
7987                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7988                                 ret = add_tree_backref(extent_cache,
7989                                                 key.objectid, 0, key.offset, 0);
7990                                 if (ret < 0)
7991                                         error(
7992                                 "add_tree_backref failed (leaf tree block): %s",
7993                                               strerror(-ret));
7994                                 continue;
7995                         }
7996                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7997                                 ret = add_tree_backref(extent_cache,
7998                                                 key.objectid, key.offset, 0, 0);
7999                                 if (ret < 0)
8000                                         error(
8001                                 "add_tree_backref failed (leaf shared block): %s",
8002                                               strerror(-ret));
8003                                 continue;
8004                         }
8005                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8006                                 struct btrfs_extent_data_ref *ref;
8007                                 ref = btrfs_item_ptr(buf, i,
8008                                                 struct btrfs_extent_data_ref);
8009                                 add_data_backref(extent_cache,
8010                                         key.objectid, 0,
8011                                         btrfs_extent_data_ref_root(buf, ref),
8012                                         btrfs_extent_data_ref_objectid(buf,
8013                                                                        ref),
8014                                         btrfs_extent_data_ref_offset(buf, ref),
8015                                         btrfs_extent_data_ref_count(buf, ref),
8016                                         0, root->fs_info->sectorsize);
8017                                 continue;
8018                         }
8019                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8020                                 struct btrfs_shared_data_ref *ref;
8021                                 ref = btrfs_item_ptr(buf, i,
8022                                                 struct btrfs_shared_data_ref);
8023                                 add_data_backref(extent_cache,
8024                                         key.objectid, key.offset, 0, 0, 0,
8025                                         btrfs_shared_data_ref_count(buf, ref),
8026                                         0, root->fs_info->sectorsize);
8027                                 continue;
8028                         }
8029                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8030                                 struct bad_item *bad;
8031
8032                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8033                                         continue;
8034                                 if (!owner)
8035                                         continue;
8036                                 bad = malloc(sizeof(struct bad_item));
8037                                 if (!bad)
8038                                         continue;
8039                                 INIT_LIST_HEAD(&bad->list);
8040                                 memcpy(&bad->key, &key,
8041                                        sizeof(struct btrfs_key));
8042                                 bad->root_id = owner;
8043                                 list_add_tail(&bad->list, &delete_items);
8044                                 continue;
8045                         }
8046                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8047                                 continue;
8048                         fi = btrfs_item_ptr(buf, i,
8049                                             struct btrfs_file_extent_item);
8050                         if (btrfs_file_extent_type(buf, fi) ==
8051                             BTRFS_FILE_EXTENT_INLINE)
8052                                 continue;
8053                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8054                                 continue;
8055
8056                         data_bytes_allocated +=
8057                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8058                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8059                                 abort();
8060                         }
8061                         data_bytes_referenced +=
8062                                 btrfs_file_extent_num_bytes(buf, fi);
8063                         add_data_backref(extent_cache,
8064                                 btrfs_file_extent_disk_bytenr(buf, fi),
8065                                 parent, owner, key.objectid, key.offset -
8066                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8067                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8068                 }
8069         } else {
8070                 int level;
8071                 struct btrfs_key first_key;
8072
8073                 first_key.objectid = 0;
8074
8075                 if (nritems > 0)
8076                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8077                 level = btrfs_header_level(buf);
8078                 for (i = 0; i < nritems; i++) {
8079                         struct extent_record tmpl;
8080
8081                         ptr = btrfs_node_blockptr(buf, i);
8082                         size = root->fs_info->nodesize;
8083                         btrfs_node_key_to_cpu(buf, &key, i);
8084                         if (ri != NULL) {
8085                                 if ((level == ri->drop_level)
8086                                     && is_dropped_key(&key, &ri->drop_key)) {
8087                                         continue;
8088                                 }
8089                         }
8090
8091                         memset(&tmpl, 0, sizeof(tmpl));
8092                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8093                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8094                         tmpl.start = ptr;
8095                         tmpl.nr = size;
8096                         tmpl.refs = 1;
8097                         tmpl.metadata = 1;
8098                         tmpl.max_size = size;
8099                         ret = add_extent_rec(extent_cache, &tmpl);
8100                         if (ret < 0)
8101                                 goto out;
8102
8103                         ret = add_tree_backref(extent_cache, ptr, parent,
8104                                         owner, 1);
8105                         if (ret < 0) {
8106                                 error(
8107                                 "add_tree_backref failed (non-leaf block): %s",
8108                                       strerror(-ret));
8109                                 continue;
8110                         }
8111
8112                         if (level > 1) {
8113                                 add_pending(nodes, seen, ptr, size);
8114                         } else {
8115                                 add_pending(pending, seen, ptr, size);
8116                         }
8117                 }
8118                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8119                                       nritems) * sizeof(struct btrfs_key_ptr);
8120         }
8121         total_btree_bytes += buf->len;
8122         if (fs_root_objectid(btrfs_header_owner(buf)))
8123                 total_fs_tree_bytes += buf->len;
8124         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8125                 total_extent_tree_bytes += buf->len;
8126 out:
8127         free_extent_buffer(buf);
8128         return ret;
8129 }
8130
8131 static int add_root_to_pending(struct extent_buffer *buf,
8132                                struct cache_tree *extent_cache,
8133                                struct cache_tree *pending,
8134                                struct cache_tree *seen,
8135                                struct cache_tree *nodes,
8136                                u64 objectid)
8137 {
8138         struct extent_record tmpl;
8139         int ret;
8140
8141         if (btrfs_header_level(buf) > 0)
8142                 add_pending(nodes, seen, buf->start, buf->len);
8143         else
8144                 add_pending(pending, seen, buf->start, buf->len);
8145
8146         memset(&tmpl, 0, sizeof(tmpl));
8147         tmpl.start = buf->start;
8148         tmpl.nr = buf->len;
8149         tmpl.is_root = 1;
8150         tmpl.refs = 1;
8151         tmpl.metadata = 1;
8152         tmpl.max_size = buf->len;
8153         add_extent_rec(extent_cache, &tmpl);
8154
8155         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8156             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8157                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8158                                 0, 1);
8159         else
8160                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8161                                 1);
8162         return ret;
8163 }
8164
8165 /* as we fix the tree, we might be deleting blocks that
8166  * we're tracking for repair.  This hook makes sure we
8167  * remove any backrefs for blocks as we are fixing them.
8168  */
8169 static int free_extent_hook(struct btrfs_trans_handle *trans,
8170                             struct btrfs_root *root,
8171                             u64 bytenr, u64 num_bytes, u64 parent,
8172                             u64 root_objectid, u64 owner, u64 offset,
8173                             int refs_to_drop)
8174 {
8175         struct extent_record *rec;
8176         struct cache_extent *cache;
8177         int is_data;
8178         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8179
8180         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8181         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8182         if (!cache)
8183                 return 0;
8184
8185         rec = container_of(cache, struct extent_record, cache);
8186         if (is_data) {
8187                 struct data_backref *back;
8188                 back = find_data_backref(rec, parent, root_objectid, owner,
8189                                          offset, 1, bytenr, num_bytes);
8190                 if (!back)
8191                         goto out;
8192                 if (back->node.found_ref) {
8193                         back->found_ref -= refs_to_drop;
8194                         if (rec->refs)
8195                                 rec->refs -= refs_to_drop;
8196                 }
8197                 if (back->node.found_extent_tree) {
8198                         back->num_refs -= refs_to_drop;
8199                         if (rec->extent_item_refs)
8200                                 rec->extent_item_refs -= refs_to_drop;
8201                 }
8202                 if (back->found_ref == 0)
8203                         back->node.found_ref = 0;
8204                 if (back->num_refs == 0)
8205                         back->node.found_extent_tree = 0;
8206
8207                 if (!back->node.found_extent_tree && back->node.found_ref) {
8208                         rb_erase(&back->node.node, &rec->backref_tree);
8209                         free(back);
8210                 }
8211         } else {
8212                 struct tree_backref *back;
8213                 back = find_tree_backref(rec, parent, root_objectid);
8214                 if (!back)
8215                         goto out;
8216                 if (back->node.found_ref) {
8217                         if (rec->refs)
8218                                 rec->refs--;
8219                         back->node.found_ref = 0;
8220                 }
8221                 if (back->node.found_extent_tree) {
8222                         if (rec->extent_item_refs)
8223                                 rec->extent_item_refs--;
8224                         back->node.found_extent_tree = 0;
8225                 }
8226                 if (!back->node.found_extent_tree && back->node.found_ref) {
8227                         rb_erase(&back->node.node, &rec->backref_tree);
8228                         free(back);
8229                 }
8230         }
8231         maybe_free_extent_rec(extent_cache, rec);
8232 out:
8233         return 0;
8234 }
8235
8236 static int delete_extent_records(struct btrfs_trans_handle *trans,
8237                                  struct btrfs_root *root,
8238                                  struct btrfs_path *path,
8239                                  u64 bytenr)
8240 {
8241         struct btrfs_key key;
8242         struct btrfs_key found_key;
8243         struct extent_buffer *leaf;
8244         int ret;
8245         int slot;
8246
8247
8248         key.objectid = bytenr;
8249         key.type = (u8)-1;
8250         key.offset = (u64)-1;
8251
8252         while(1) {
8253                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8254                                         &key, path, 0, 1);
8255                 if (ret < 0)
8256                         break;
8257
8258                 if (ret > 0) {
8259                         ret = 0;
8260                         if (path->slots[0] == 0)
8261                                 break;
8262                         path->slots[0]--;
8263                 }
8264                 ret = 0;
8265
8266                 leaf = path->nodes[0];
8267                 slot = path->slots[0];
8268
8269                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8270                 if (found_key.objectid != bytenr)
8271                         break;
8272
8273                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8274                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8275                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8276                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8277                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8278                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8279                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8280                         btrfs_release_path(path);
8281                         if (found_key.type == 0) {
8282                                 if (found_key.offset == 0)
8283                                         break;
8284                                 key.offset = found_key.offset - 1;
8285                                 key.type = found_key.type;
8286                         }
8287                         key.type = found_key.type - 1;
8288                         key.offset = (u64)-1;
8289                         continue;
8290                 }
8291
8292                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8293                         found_key.objectid, found_key.type, found_key.offset);
8294
8295                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8296                 if (ret)
8297                         break;
8298                 btrfs_release_path(path);
8299
8300                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8301                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8302                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8303                                 found_key.offset : root->fs_info->nodesize;
8304
8305                         ret = btrfs_update_block_group(trans, root, bytenr,
8306                                                        bytes, 0, 0);
8307                         if (ret)
8308                                 break;
8309                 }
8310         }
8311
8312         btrfs_release_path(path);
8313         return ret;
8314 }
8315
8316 /*
8317  * for a single backref, this will allocate a new extent
8318  * and add the backref to it.
8319  */
8320 static int record_extent(struct btrfs_trans_handle *trans,
8321                          struct btrfs_fs_info *info,
8322                          struct btrfs_path *path,
8323                          struct extent_record *rec,
8324                          struct extent_backref *back,
8325                          int allocated, u64 flags)
8326 {
8327         int ret = 0;
8328         struct btrfs_root *extent_root = info->extent_root;
8329         struct extent_buffer *leaf;
8330         struct btrfs_key ins_key;
8331         struct btrfs_extent_item *ei;
8332         struct data_backref *dback;
8333         struct btrfs_tree_block_info *bi;
8334
8335         if (!back->is_data)
8336                 rec->max_size = max_t(u64, rec->max_size,
8337                                     info->nodesize);
8338
8339         if (!allocated) {
8340                 u32 item_size = sizeof(*ei);
8341
8342                 if (!back->is_data)
8343                         item_size += sizeof(*bi);
8344
8345                 ins_key.objectid = rec->start;
8346                 ins_key.offset = rec->max_size;
8347                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8348
8349                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8350                                         &ins_key, item_size);
8351                 if (ret)
8352                         goto fail;
8353
8354                 leaf = path->nodes[0];
8355                 ei = btrfs_item_ptr(leaf, path->slots[0],
8356                                     struct btrfs_extent_item);
8357
8358                 btrfs_set_extent_refs(leaf, ei, 0);
8359                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8360
8361                 if (back->is_data) {
8362                         btrfs_set_extent_flags(leaf, ei,
8363                                                BTRFS_EXTENT_FLAG_DATA);
8364                 } else {
8365                         struct btrfs_disk_key copy_key;;
8366
8367                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8368                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8369                                              sizeof(*bi));
8370
8371                         btrfs_set_disk_key_objectid(&copy_key,
8372                                                     rec->info_objectid);
8373                         btrfs_set_disk_key_type(&copy_key, 0);
8374                         btrfs_set_disk_key_offset(&copy_key, 0);
8375
8376                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8377                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8378
8379                         btrfs_set_extent_flags(leaf, ei,
8380                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8381                 }
8382
8383                 btrfs_mark_buffer_dirty(leaf);
8384                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8385                                                rec->max_size, 1, 0);
8386                 if (ret)
8387                         goto fail;
8388                 btrfs_release_path(path);
8389         }
8390
8391         if (back->is_data) {
8392                 u64 parent;
8393                 int i;
8394
8395                 dback = to_data_backref(back);
8396                 if (back->full_backref)
8397                         parent = dback->parent;
8398                 else
8399                         parent = 0;
8400
8401                 for (i = 0; i < dback->found_ref; i++) {
8402                         /* if parent != 0, we're doing a full backref
8403                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8404                          * just makes the backref allocator create a data
8405                          * backref
8406                          */
8407                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8408                                                    rec->start, rec->max_size,
8409                                                    parent,
8410                                                    dback->root,
8411                                                    parent ?
8412                                                    BTRFS_FIRST_FREE_OBJECTID :
8413                                                    dback->owner,
8414                                                    dback->offset);
8415                         if (ret)
8416                                 break;
8417                 }
8418                 fprintf(stderr, "adding new data backref"
8419                                 " on %llu %s %llu owner %llu"
8420                                 " offset %llu found %d\n",
8421                                 (unsigned long long)rec->start,
8422                                 back->full_backref ?
8423                                 "parent" : "root",
8424                                 back->full_backref ?
8425                                 (unsigned long long)parent :
8426                                 (unsigned long long)dback->root,
8427                                 (unsigned long long)dback->owner,
8428                                 (unsigned long long)dback->offset,
8429                                 dback->found_ref);
8430         } else {
8431                 u64 parent;
8432                 struct tree_backref *tback;
8433
8434                 tback = to_tree_backref(back);
8435                 if (back->full_backref)
8436                         parent = tback->parent;
8437                 else
8438                         parent = 0;
8439
8440                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8441                                            rec->start, rec->max_size,
8442                                            parent, tback->root, 0, 0);
8443                 fprintf(stderr, "adding new tree backref on "
8444                         "start %llu len %llu parent %llu root %llu\n",
8445                         rec->start, rec->max_size, parent, tback->root);
8446         }
8447 fail:
8448         btrfs_release_path(path);
8449         return ret;
8450 }
8451
8452 static struct extent_entry *find_entry(struct list_head *entries,
8453                                        u64 bytenr, u64 bytes)
8454 {
8455         struct extent_entry *entry = NULL;
8456
8457         list_for_each_entry(entry, entries, list) {
8458                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8459                         return entry;
8460         }
8461
8462         return NULL;
8463 }
8464
8465 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8466 {
8467         struct extent_entry *entry, *best = NULL, *prev = NULL;
8468
8469         list_for_each_entry(entry, entries, list) {
8470                 /*
8471                  * If there are as many broken entries as entries then we know
8472                  * not to trust this particular entry.
8473                  */
8474                 if (entry->broken == entry->count)
8475                         continue;
8476
8477                 /*
8478                  * Special case, when there are only two entries and 'best' is
8479                  * the first one
8480                  */
8481                 if (!prev) {
8482                         best = entry;
8483                         prev = entry;
8484                         continue;
8485                 }
8486
8487                 /*
8488                  * If our current entry == best then we can't be sure our best
8489                  * is really the best, so we need to keep searching.
8490                  */
8491                 if (best && best->count == entry->count) {
8492                         prev = entry;
8493                         best = NULL;
8494                         continue;
8495                 }
8496
8497                 /* Prev == entry, not good enough, have to keep searching */
8498                 if (!prev->broken && prev->count == entry->count)
8499                         continue;
8500
8501                 if (!best)
8502                         best = (prev->count > entry->count) ? prev : entry;
8503                 else if (best->count < entry->count)
8504                         best = entry;
8505                 prev = entry;
8506         }
8507
8508         return best;
8509 }
8510
8511 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8512                       struct data_backref *dback, struct extent_entry *entry)
8513 {
8514         struct btrfs_trans_handle *trans;
8515         struct btrfs_root *root;
8516         struct btrfs_file_extent_item *fi;
8517         struct extent_buffer *leaf;
8518         struct btrfs_key key;
8519         u64 bytenr, bytes;
8520         int ret, err;
8521
8522         key.objectid = dback->root;
8523         key.type = BTRFS_ROOT_ITEM_KEY;
8524         key.offset = (u64)-1;
8525         root = btrfs_read_fs_root(info, &key);
8526         if (IS_ERR(root)) {
8527                 fprintf(stderr, "Couldn't find root for our ref\n");
8528                 return -EINVAL;
8529         }
8530
8531         /*
8532          * The backref points to the original offset of the extent if it was
8533          * split, so we need to search down to the offset we have and then walk
8534          * forward until we find the backref we're looking for.
8535          */
8536         key.objectid = dback->owner;
8537         key.type = BTRFS_EXTENT_DATA_KEY;
8538         key.offset = dback->offset;
8539         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8540         if (ret < 0) {
8541                 fprintf(stderr, "Error looking up ref %d\n", ret);
8542                 return ret;
8543         }
8544
8545         while (1) {
8546                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8547                         ret = btrfs_next_leaf(root, path);
8548                         if (ret) {
8549                                 fprintf(stderr, "Couldn't find our ref, next\n");
8550                                 return -EINVAL;
8551                         }
8552                 }
8553                 leaf = path->nodes[0];
8554                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8555                 if (key.objectid != dback->owner ||
8556                     key.type != BTRFS_EXTENT_DATA_KEY) {
8557                         fprintf(stderr, "Couldn't find our ref, search\n");
8558                         return -EINVAL;
8559                 }
8560                 fi = btrfs_item_ptr(leaf, path->slots[0],
8561                                     struct btrfs_file_extent_item);
8562                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8563                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8564
8565                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8566                         break;
8567                 path->slots[0]++;
8568         }
8569
8570         btrfs_release_path(path);
8571
8572         trans = btrfs_start_transaction(root, 1);
8573         if (IS_ERR(trans))
8574                 return PTR_ERR(trans);
8575
8576         /*
8577          * Ok we have the key of the file extent we want to fix, now we can cow
8578          * down to the thing and fix it.
8579          */
8580         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8581         if (ret < 0) {
8582                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8583                         key.objectid, key.type, key.offset, ret);
8584                 goto out;
8585         }
8586         if (ret > 0) {
8587                 fprintf(stderr, "Well that's odd, we just found this key "
8588                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8589                         key.offset);
8590                 ret = -EINVAL;
8591                 goto out;
8592         }
8593         leaf = path->nodes[0];
8594         fi = btrfs_item_ptr(leaf, path->slots[0],
8595                             struct btrfs_file_extent_item);
8596
8597         if (btrfs_file_extent_compression(leaf, fi) &&
8598             dback->disk_bytenr != entry->bytenr) {
8599                 fprintf(stderr, "Ref doesn't match the record start and is "
8600                         "compressed, please take a btrfs-image of this file "
8601                         "system and send it to a btrfs developer so they can "
8602                         "complete this functionality for bytenr %Lu\n",
8603                         dback->disk_bytenr);
8604                 ret = -EINVAL;
8605                 goto out;
8606         }
8607
8608         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8609                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8610         } else if (dback->disk_bytenr > entry->bytenr) {
8611                 u64 off_diff, offset;
8612
8613                 off_diff = dback->disk_bytenr - entry->bytenr;
8614                 offset = btrfs_file_extent_offset(leaf, fi);
8615                 if (dback->disk_bytenr + offset +
8616                     btrfs_file_extent_num_bytes(leaf, fi) >
8617                     entry->bytenr + entry->bytes) {
8618                         fprintf(stderr, "Ref is past the entry end, please "
8619                                 "take a btrfs-image of this file system and "
8620                                 "send it to a btrfs developer, ref %Lu\n",
8621                                 dback->disk_bytenr);
8622                         ret = -EINVAL;
8623                         goto out;
8624                 }
8625                 offset += off_diff;
8626                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8627                 btrfs_set_file_extent_offset(leaf, fi, offset);
8628         } else if (dback->disk_bytenr < entry->bytenr) {
8629                 u64 offset;
8630
8631                 offset = btrfs_file_extent_offset(leaf, fi);
8632                 if (dback->disk_bytenr + offset < entry->bytenr) {
8633                         fprintf(stderr, "Ref is before the entry start, please"
8634                                 " take a btrfs-image of this file system and "
8635                                 "send it to a btrfs developer, ref %Lu\n",
8636                                 dback->disk_bytenr);
8637                         ret = -EINVAL;
8638                         goto out;
8639                 }
8640
8641                 offset += dback->disk_bytenr;
8642                 offset -= entry->bytenr;
8643                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8644                 btrfs_set_file_extent_offset(leaf, fi, offset);
8645         }
8646
8647         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8648
8649         /*
8650          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8651          * only do this if we aren't using compression, otherwise it's a
8652          * trickier case.
8653          */
8654         if (!btrfs_file_extent_compression(leaf, fi))
8655                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8656         else
8657                 printf("ram bytes may be wrong?\n");
8658         btrfs_mark_buffer_dirty(leaf);
8659 out:
8660         err = btrfs_commit_transaction(trans, root);
8661         btrfs_release_path(path);
8662         return ret ? ret : err;
8663 }
8664
8665 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8666                            struct extent_record *rec)
8667 {
8668         struct extent_backref *back, *tmp;
8669         struct data_backref *dback;
8670         struct extent_entry *entry, *best = NULL;
8671         LIST_HEAD(entries);
8672         int nr_entries = 0;
8673         int broken_entries = 0;
8674         int ret = 0;
8675         short mismatch = 0;
8676
8677         /*
8678          * Metadata is easy and the backrefs should always agree on bytenr and
8679          * size, if not we've got bigger issues.
8680          */
8681         if (rec->metadata)
8682                 return 0;
8683
8684         rbtree_postorder_for_each_entry_safe(back, tmp,
8685                                              &rec->backref_tree, node) {
8686                 if (back->full_backref || !back->is_data)
8687                         continue;
8688
8689                 dback = to_data_backref(back);
8690
8691                 /*
8692                  * We only pay attention to backrefs that we found a real
8693                  * backref for.
8694                  */
8695                 if (dback->found_ref == 0)
8696                         continue;
8697
8698                 /*
8699                  * For now we only catch when the bytes don't match, not the
8700                  * bytenr.  We can easily do this at the same time, but I want
8701                  * to have a fs image to test on before we just add repair
8702                  * functionality willy-nilly so we know we won't screw up the
8703                  * repair.
8704                  */
8705
8706                 entry = find_entry(&entries, dback->disk_bytenr,
8707                                    dback->bytes);
8708                 if (!entry) {
8709                         entry = malloc(sizeof(struct extent_entry));
8710                         if (!entry) {
8711                                 ret = -ENOMEM;
8712                                 goto out;
8713                         }
8714                         memset(entry, 0, sizeof(*entry));
8715                         entry->bytenr = dback->disk_bytenr;
8716                         entry->bytes = dback->bytes;
8717                         list_add_tail(&entry->list, &entries);
8718                         nr_entries++;
8719                 }
8720
8721                 /*
8722                  * If we only have on entry we may think the entries agree when
8723                  * in reality they don't so we have to do some extra checking.
8724                  */
8725                 if (dback->disk_bytenr != rec->start ||
8726                     dback->bytes != rec->nr || back->broken)
8727                         mismatch = 1;
8728
8729                 if (back->broken) {
8730                         entry->broken++;
8731                         broken_entries++;
8732                 }
8733
8734                 entry->count++;
8735         }
8736
8737         /* Yay all the backrefs agree, carry on good sir */
8738         if (nr_entries <= 1 && !mismatch)
8739                 goto out;
8740
8741         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8742                 "%Lu\n", rec->start);
8743
8744         /*
8745          * First we want to see if the backrefs can agree amongst themselves who
8746          * is right, so figure out which one of the entries has the highest
8747          * count.
8748          */
8749         best = find_most_right_entry(&entries);
8750
8751         /*
8752          * Ok so we may have an even split between what the backrefs think, so
8753          * this is where we use the extent ref to see what it thinks.
8754          */
8755         if (!best) {
8756                 entry = find_entry(&entries, rec->start, rec->nr);
8757                 if (!entry && (!broken_entries || !rec->found_rec)) {
8758                         fprintf(stderr, "Backrefs don't agree with each other "
8759                                 "and extent record doesn't agree with anybody,"
8760                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8761                                 rec->start, rec->nr);
8762                         ret = -EINVAL;
8763                         goto out;
8764                 } else if (!entry) {
8765                         /*
8766                          * Ok our backrefs were broken, we'll assume this is the
8767                          * correct value and add an entry for this range.
8768                          */
8769                         entry = malloc(sizeof(struct extent_entry));
8770                         if (!entry) {
8771                                 ret = -ENOMEM;
8772                                 goto out;
8773                         }
8774                         memset(entry, 0, sizeof(*entry));
8775                         entry->bytenr = rec->start;
8776                         entry->bytes = rec->nr;
8777                         list_add_tail(&entry->list, &entries);
8778                         nr_entries++;
8779                 }
8780                 entry->count++;
8781                 best = find_most_right_entry(&entries);
8782                 if (!best) {
8783                         fprintf(stderr, "Backrefs and extent record evenly "
8784                                 "split on who is right, this is going to "
8785                                 "require user input to fix bytenr %Lu bytes "
8786                                 "%Lu\n", rec->start, rec->nr);
8787                         ret = -EINVAL;
8788                         goto out;
8789                 }
8790         }
8791
8792         /*
8793          * I don't think this can happen currently as we'll abort() if we catch
8794          * this case higher up, but in case somebody removes that we still can't
8795          * deal with it properly here yet, so just bail out of that's the case.
8796          */
8797         if (best->bytenr != rec->start) {
8798                 fprintf(stderr, "Extent start and backref starts don't match, "
8799                         "please use btrfs-image on this file system and send "
8800                         "it to a btrfs developer so they can make fsck fix "
8801                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8802                         rec->start, rec->nr);
8803                 ret = -EINVAL;
8804                 goto out;
8805         }
8806
8807         /*
8808          * Ok great we all agreed on an extent record, let's go find the real
8809          * references and fix up the ones that don't match.
8810          */
8811         rbtree_postorder_for_each_entry_safe(back, tmp,
8812                                              &rec->backref_tree, node) {
8813                 if (back->full_backref || !back->is_data)
8814                         continue;
8815
8816                 dback = to_data_backref(back);
8817
8818                 /*
8819                  * Still ignoring backrefs that don't have a real ref attached
8820                  * to them.
8821                  */
8822                 if (dback->found_ref == 0)
8823                         continue;
8824
8825                 if (dback->bytes == best->bytes &&
8826                     dback->disk_bytenr == best->bytenr)
8827                         continue;
8828
8829                 ret = repair_ref(info, path, dback, best);
8830                 if (ret)
8831                         goto out;
8832         }
8833
8834         /*
8835          * Ok we messed with the actual refs, which means we need to drop our
8836          * entire cache and go back and rescan.  I know this is a huge pain and
8837          * adds a lot of extra work, but it's the only way to be safe.  Once all
8838          * the backrefs agree we may not need to do anything to the extent
8839          * record itself.
8840          */
8841         ret = -EAGAIN;
8842 out:
8843         while (!list_empty(&entries)) {
8844                 entry = list_entry(entries.next, struct extent_entry, list);
8845                 list_del_init(&entry->list);
8846                 free(entry);
8847         }
8848         return ret;
8849 }
8850
8851 static int process_duplicates(struct cache_tree *extent_cache,
8852                               struct extent_record *rec)
8853 {
8854         struct extent_record *good, *tmp;
8855         struct cache_extent *cache;
8856         int ret;
8857
8858         /*
8859          * If we found a extent record for this extent then return, or if we
8860          * have more than one duplicate we are likely going to need to delete
8861          * something.
8862          */
8863         if (rec->found_rec || rec->num_duplicates > 1)
8864                 return 0;
8865
8866         /* Shouldn't happen but just in case */
8867         BUG_ON(!rec->num_duplicates);
8868
8869         /*
8870          * So this happens if we end up with a backref that doesn't match the
8871          * actual extent entry.  So either the backref is bad or the extent
8872          * entry is bad.  Either way we want to have the extent_record actually
8873          * reflect what we found in the extent_tree, so we need to take the
8874          * duplicate out and use that as the extent_record since the only way we
8875          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8876          */
8877         remove_cache_extent(extent_cache, &rec->cache);
8878
8879         good = to_extent_record(rec->dups.next);
8880         list_del_init(&good->list);
8881         INIT_LIST_HEAD(&good->backrefs);
8882         INIT_LIST_HEAD(&good->dups);
8883         good->cache.start = good->start;
8884         good->cache.size = good->nr;
8885         good->content_checked = 0;
8886         good->owner_ref_checked = 0;
8887         good->num_duplicates = 0;
8888         good->refs = rec->refs;
8889         list_splice_init(&rec->backrefs, &good->backrefs);
8890         while (1) {
8891                 cache = lookup_cache_extent(extent_cache, good->start,
8892                                             good->nr);
8893                 if (!cache)
8894                         break;
8895                 tmp = container_of(cache, struct extent_record, cache);
8896
8897                 /*
8898                  * If we find another overlapping extent and it's found_rec is
8899                  * set then it's a duplicate and we need to try and delete
8900                  * something.
8901                  */
8902                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8903                         if (list_empty(&good->list))
8904                                 list_add_tail(&good->list,
8905                                               &duplicate_extents);
8906                         good->num_duplicates += tmp->num_duplicates + 1;
8907                         list_splice_init(&tmp->dups, &good->dups);
8908                         list_del_init(&tmp->list);
8909                         list_add_tail(&tmp->list, &good->dups);
8910                         remove_cache_extent(extent_cache, &tmp->cache);
8911                         continue;
8912                 }
8913
8914                 /*
8915                  * Ok we have another non extent item backed extent rec, so lets
8916                  * just add it to this extent and carry on like we did above.
8917                  */
8918                 good->refs += tmp->refs;
8919                 list_splice_init(&tmp->backrefs, &good->backrefs);
8920                 remove_cache_extent(extent_cache, &tmp->cache);
8921                 free(tmp);
8922         }
8923         ret = insert_cache_extent(extent_cache, &good->cache);
8924         BUG_ON(ret);
8925         free(rec);
8926         return good->num_duplicates ? 0 : 1;
8927 }
8928
8929 static int delete_duplicate_records(struct btrfs_root *root,
8930                                     struct extent_record *rec)
8931 {
8932         struct btrfs_trans_handle *trans;
8933         LIST_HEAD(delete_list);
8934         struct btrfs_path path;
8935         struct extent_record *tmp, *good, *n;
8936         int nr_del = 0;
8937         int ret = 0, err;
8938         struct btrfs_key key;
8939
8940         btrfs_init_path(&path);
8941
8942         good = rec;
8943         /* Find the record that covers all of the duplicates. */
8944         list_for_each_entry(tmp, &rec->dups, list) {
8945                 if (good->start < tmp->start)
8946                         continue;
8947                 if (good->nr > tmp->nr)
8948                         continue;
8949
8950                 if (tmp->start + tmp->nr < good->start + good->nr) {
8951                         fprintf(stderr, "Ok we have overlapping extents that "
8952                                 "aren't completely covered by each other, this "
8953                                 "is going to require more careful thought.  "
8954                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8955                                 tmp->start, tmp->nr, good->start, good->nr);
8956                         abort();
8957                 }
8958                 good = tmp;
8959         }
8960
8961         if (good != rec)
8962                 list_add_tail(&rec->list, &delete_list);
8963
8964         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8965                 if (tmp == good)
8966                         continue;
8967                 list_move_tail(&tmp->list, &delete_list);
8968         }
8969
8970         root = root->fs_info->extent_root;
8971         trans = btrfs_start_transaction(root, 1);
8972         if (IS_ERR(trans)) {
8973                 ret = PTR_ERR(trans);
8974                 goto out;
8975         }
8976
8977         list_for_each_entry(tmp, &delete_list, list) {
8978                 if (tmp->found_rec == 0)
8979                         continue;
8980                 key.objectid = tmp->start;
8981                 key.type = BTRFS_EXTENT_ITEM_KEY;
8982                 key.offset = tmp->nr;
8983
8984                 /* Shouldn't happen but just in case */
8985                 if (tmp->metadata) {
8986                         fprintf(stderr, "Well this shouldn't happen, extent "
8987                                 "record overlaps but is metadata? "
8988                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8989                         abort();
8990                 }
8991
8992                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8993                 if (ret) {
8994                         if (ret > 0)
8995                                 ret = -EINVAL;
8996                         break;
8997                 }
8998                 ret = btrfs_del_item(trans, root, &path);
8999                 if (ret)
9000                         break;
9001                 btrfs_release_path(&path);
9002                 nr_del++;
9003         }
9004         err = btrfs_commit_transaction(trans, root);
9005         if (err && !ret)
9006                 ret = err;
9007 out:
9008         while (!list_empty(&delete_list)) {
9009                 tmp = to_extent_record(delete_list.next);
9010                 list_del_init(&tmp->list);
9011                 if (tmp == rec)
9012                         continue;
9013                 free(tmp);
9014         }
9015
9016         while (!list_empty(&rec->dups)) {
9017                 tmp = to_extent_record(rec->dups.next);
9018                 list_del_init(&tmp->list);
9019                 free(tmp);
9020         }
9021
9022         btrfs_release_path(&path);
9023
9024         if (!ret && !nr_del)
9025                 rec->num_duplicates = 0;
9026
9027         return ret ? ret : nr_del;
9028 }
9029
9030 static int find_possible_backrefs(struct btrfs_fs_info *info,
9031                                   struct btrfs_path *path,
9032                                   struct cache_tree *extent_cache,
9033                                   struct extent_record *rec)
9034 {
9035         struct btrfs_root *root;
9036         struct extent_backref *back, *tmp;
9037         struct data_backref *dback;
9038         struct cache_extent *cache;
9039         struct btrfs_file_extent_item *fi;
9040         struct btrfs_key key;
9041         u64 bytenr, bytes;
9042         int ret;
9043
9044         rbtree_postorder_for_each_entry_safe(back, tmp,
9045                                              &rec->backref_tree, node) {
9046                 /* Don't care about full backrefs (poor unloved backrefs) */
9047                 if (back->full_backref || !back->is_data)
9048                         continue;
9049
9050                 dback = to_data_backref(back);
9051
9052                 /* We found this one, we don't need to do a lookup */
9053                 if (dback->found_ref)
9054                         continue;
9055
9056                 key.objectid = dback->root;
9057                 key.type = BTRFS_ROOT_ITEM_KEY;
9058                 key.offset = (u64)-1;
9059
9060                 root = btrfs_read_fs_root(info, &key);
9061
9062                 /* No root, definitely a bad ref, skip */
9063                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9064                         continue;
9065                 /* Other err, exit */
9066                 if (IS_ERR(root))
9067                         return PTR_ERR(root);
9068
9069                 key.objectid = dback->owner;
9070                 key.type = BTRFS_EXTENT_DATA_KEY;
9071                 key.offset = dback->offset;
9072                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9073                 if (ret) {
9074                         btrfs_release_path(path);
9075                         if (ret < 0)
9076                                 return ret;
9077                         /* Didn't find it, we can carry on */
9078                         ret = 0;
9079                         continue;
9080                 }
9081
9082                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9083                                     struct btrfs_file_extent_item);
9084                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9085                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9086                 btrfs_release_path(path);
9087                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9088                 if (cache) {
9089                         struct extent_record *tmp;
9090                         tmp = container_of(cache, struct extent_record, cache);
9091
9092                         /*
9093                          * If we found an extent record for the bytenr for this
9094                          * particular backref then we can't add it to our
9095                          * current extent record.  We only want to add backrefs
9096                          * that don't have a corresponding extent item in the
9097                          * extent tree since they likely belong to this record
9098                          * and we need to fix it if it doesn't match bytenrs.
9099                          */
9100                         if  (tmp->found_rec)
9101                                 continue;
9102                 }
9103
9104                 dback->found_ref += 1;
9105                 dback->disk_bytenr = bytenr;
9106                 dback->bytes = bytes;
9107
9108                 /*
9109                  * Set this so the verify backref code knows not to trust the
9110                  * values in this backref.
9111                  */
9112                 back->broken = 1;
9113         }
9114
9115         return 0;
9116 }
9117
9118 /*
9119  * Record orphan data ref into corresponding root.
9120  *
9121  * Return 0 if the extent item contains data ref and recorded.
9122  * Return 1 if the extent item contains no useful data ref
9123  *   On that case, it may contains only shared_dataref or metadata backref
9124  *   or the file extent exists(this should be handled by the extent bytenr
9125  *   recovery routine)
9126  * Return <0 if something goes wrong.
9127  */
9128 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9129                                       struct extent_record *rec)
9130 {
9131         struct btrfs_key key;
9132         struct btrfs_root *dest_root;
9133         struct extent_backref *back, *tmp;
9134         struct data_backref *dback;
9135         struct orphan_data_extent *orphan;
9136         struct btrfs_path path;
9137         int recorded_data_ref = 0;
9138         int ret = 0;
9139
9140         if (rec->metadata)
9141                 return 1;
9142         btrfs_init_path(&path);
9143         rbtree_postorder_for_each_entry_safe(back, tmp,
9144                                              &rec->backref_tree, node) {
9145                 if (back->full_backref || !back->is_data ||
9146                     !back->found_extent_tree)
9147                         continue;
9148                 dback = to_data_backref(back);
9149                 if (dback->found_ref)
9150                         continue;
9151                 key.objectid = dback->root;
9152                 key.type = BTRFS_ROOT_ITEM_KEY;
9153                 key.offset = (u64)-1;
9154
9155                 dest_root = btrfs_read_fs_root(fs_info, &key);
9156
9157                 /* For non-exist root we just skip it */
9158                 if (IS_ERR(dest_root) || !dest_root)
9159                         continue;
9160
9161                 key.objectid = dback->owner;
9162                 key.type = BTRFS_EXTENT_DATA_KEY;
9163                 key.offset = dback->offset;
9164
9165                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9166                 btrfs_release_path(&path);
9167                 /*
9168                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9169                  * we need to record it for inode/file extent rebuild.
9170                  * For ret > 0, we record it only for file extent rebuild.
9171                  * For ret == 0, the file extent exists but only bytenr
9172                  * mismatch, let the original bytenr fix routine to handle,
9173                  * don't record it.
9174                  */
9175                 if (ret == 0)
9176                         continue;
9177                 ret = 0;
9178                 orphan = malloc(sizeof(*orphan));
9179                 if (!orphan) {
9180                         ret = -ENOMEM;
9181                         goto out;
9182                 }
9183                 INIT_LIST_HEAD(&orphan->list);
9184                 orphan->root = dback->root;
9185                 orphan->objectid = dback->owner;
9186                 orphan->offset = dback->offset;
9187                 orphan->disk_bytenr = rec->cache.start;
9188                 orphan->disk_len = rec->cache.size;
9189                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9190                 recorded_data_ref = 1;
9191         }
9192 out:
9193         btrfs_release_path(&path);
9194         if (!ret)
9195                 return !recorded_data_ref;
9196         else
9197                 return ret;
9198 }
9199
9200 /*
9201  * when an incorrect extent item is found, this will delete
9202  * all of the existing entries for it and recreate them
9203  * based on what the tree scan found.
9204  */
9205 static int fixup_extent_refs(struct btrfs_fs_info *info,
9206                              struct cache_tree *extent_cache,
9207                              struct extent_record *rec)
9208 {
9209         struct btrfs_trans_handle *trans = NULL;
9210         int ret;
9211         struct btrfs_path path;
9212         struct cache_extent *cache;
9213         struct extent_backref *back, *tmp;
9214         int allocated = 0;
9215         u64 flags = 0;
9216
9217         if (rec->flag_block_full_backref)
9218                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9219
9220         btrfs_init_path(&path);
9221         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9222                 /*
9223                  * Sometimes the backrefs themselves are so broken they don't
9224                  * get attached to any meaningful rec, so first go back and
9225                  * check any of our backrefs that we couldn't find and throw
9226                  * them into the list if we find the backref so that
9227                  * verify_backrefs can figure out what to do.
9228                  */
9229                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9230                 if (ret < 0)
9231                         goto out;
9232         }
9233
9234         /* step one, make sure all of the backrefs agree */
9235         ret = verify_backrefs(info, &path, rec);
9236         if (ret < 0)
9237                 goto out;
9238
9239         trans = btrfs_start_transaction(info->extent_root, 1);
9240         if (IS_ERR(trans)) {
9241                 ret = PTR_ERR(trans);
9242                 goto out;
9243         }
9244
9245         /* step two, delete all the existing records */
9246         ret = delete_extent_records(trans, info->extent_root, &path,
9247                                     rec->start);
9248
9249         if (ret < 0)
9250                 goto out;
9251
9252         /* was this block corrupt?  If so, don't add references to it */
9253         cache = lookup_cache_extent(info->corrupt_blocks,
9254                                     rec->start, rec->max_size);
9255         if (cache) {
9256                 ret = 0;
9257                 goto out;
9258         }
9259
9260         /* step three, recreate all the refs we did find */
9261         rbtree_postorder_for_each_entry_safe(back, tmp,
9262                                              &rec->backref_tree, node) {
9263                 /*
9264                  * if we didn't find any references, don't create a
9265                  * new extent record
9266                  */
9267                 if (!back->found_ref)
9268                         continue;
9269
9270                 rec->bad_full_backref = 0;
9271                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9272                 allocated = 1;
9273
9274                 if (ret)
9275                         goto out;
9276         }
9277 out:
9278         if (trans) {
9279                 int err = btrfs_commit_transaction(trans, info->extent_root);
9280                 if (!ret)
9281                         ret = err;
9282         }
9283
9284         if (!ret)
9285                 fprintf(stderr, "Repaired extent references for %llu\n",
9286                                 (unsigned long long)rec->start);
9287
9288         btrfs_release_path(&path);
9289         return ret;
9290 }
9291
9292 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9293                               struct extent_record *rec)
9294 {
9295         struct btrfs_trans_handle *trans;
9296         struct btrfs_root *root = fs_info->extent_root;
9297         struct btrfs_path path;
9298         struct btrfs_extent_item *ei;
9299         struct btrfs_key key;
9300         u64 flags;
9301         int ret = 0;
9302
9303         key.objectid = rec->start;
9304         if (rec->metadata) {
9305                 key.type = BTRFS_METADATA_ITEM_KEY;
9306                 key.offset = rec->info_level;
9307         } else {
9308                 key.type = BTRFS_EXTENT_ITEM_KEY;
9309                 key.offset = rec->max_size;
9310         }
9311
9312         trans = btrfs_start_transaction(root, 0);
9313         if (IS_ERR(trans))
9314                 return PTR_ERR(trans);
9315
9316         btrfs_init_path(&path);
9317         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9318         if (ret < 0) {
9319                 btrfs_release_path(&path);
9320                 btrfs_commit_transaction(trans, root);
9321                 return ret;
9322         } else if (ret) {
9323                 fprintf(stderr, "Didn't find extent for %llu\n",
9324                         (unsigned long long)rec->start);
9325                 btrfs_release_path(&path);
9326                 btrfs_commit_transaction(trans, root);
9327                 return -ENOENT;
9328         }
9329
9330         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9331                             struct btrfs_extent_item);
9332         flags = btrfs_extent_flags(path.nodes[0], ei);
9333         if (rec->flag_block_full_backref) {
9334                 fprintf(stderr, "setting full backref on %llu\n",
9335                         (unsigned long long)key.objectid);
9336                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9337         } else {
9338                 fprintf(stderr, "clearing full backref on %llu\n",
9339                         (unsigned long long)key.objectid);
9340                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9341         }
9342         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9343         btrfs_mark_buffer_dirty(path.nodes[0]);
9344         btrfs_release_path(&path);
9345         ret = btrfs_commit_transaction(trans, root);
9346         if (!ret)
9347                 fprintf(stderr, "Repaired extent flags for %llu\n",
9348                                 (unsigned long long)rec->start);
9349
9350         return ret;
9351 }
9352
9353 /* right now we only prune from the extent allocation tree */
9354 static int prune_one_block(struct btrfs_trans_handle *trans,
9355                            struct btrfs_fs_info *info,
9356                            struct btrfs_corrupt_block *corrupt)
9357 {
9358         int ret;
9359         struct btrfs_path path;
9360         struct extent_buffer *eb;
9361         u64 found;
9362         int slot;
9363         int nritems;
9364         int level = corrupt->level + 1;
9365
9366         btrfs_init_path(&path);
9367 again:
9368         /* we want to stop at the parent to our busted block */
9369         path.lowest_level = level;
9370
9371         ret = btrfs_search_slot(trans, info->extent_root,
9372                                 &corrupt->key, &path, -1, 1);
9373
9374         if (ret < 0)
9375                 goto out;
9376
9377         eb = path.nodes[level];
9378         if (!eb) {
9379                 ret = -ENOENT;
9380                 goto out;
9381         }
9382
9383         /*
9384          * hopefully the search gave us the block we want to prune,
9385          * lets try that first
9386          */
9387         slot = path.slots[level];
9388         found =  btrfs_node_blockptr(eb, slot);
9389         if (found == corrupt->cache.start)
9390                 goto del_ptr;
9391
9392         nritems = btrfs_header_nritems(eb);
9393
9394         /* the search failed, lets scan this node and hope we find it */
9395         for (slot = 0; slot < nritems; slot++) {
9396                 found =  btrfs_node_blockptr(eb, slot);
9397                 if (found == corrupt->cache.start)
9398                         goto del_ptr;
9399         }
9400         /*
9401          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9402          * to this block
9403          */
9404         if (eb == info->extent_root->node) {
9405                 ret = -ENOENT;
9406                 goto out;
9407         } else {
9408                 level++;
9409                 btrfs_release_path(&path);
9410                 goto again;
9411         }
9412
9413 del_ptr:
9414         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9415         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9416
9417 out:
9418         btrfs_release_path(&path);
9419         return ret;
9420 }
9421
9422 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9423 {
9424         struct btrfs_trans_handle *trans = NULL;
9425         struct cache_extent *cache;
9426         struct btrfs_corrupt_block *corrupt;
9427
9428         while (1) {
9429                 cache = search_cache_extent(info->corrupt_blocks, 0);
9430                 if (!cache)
9431                         break;
9432                 if (!trans) {
9433                         trans = btrfs_start_transaction(info->extent_root, 1);
9434                         if (IS_ERR(trans))
9435                                 return PTR_ERR(trans);
9436                 }
9437                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9438                 prune_one_block(trans, info, corrupt);
9439                 remove_cache_extent(info->corrupt_blocks, cache);
9440         }
9441         if (trans)
9442                 return btrfs_commit_transaction(trans, info->extent_root);
9443         return 0;
9444 }
9445
9446 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9447 {
9448         struct btrfs_block_group_cache *cache;
9449         u64 start, end;
9450         int ret;
9451
9452         while (1) {
9453                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9454                                             &start, &end, EXTENT_DIRTY);
9455                 if (ret)
9456                         break;
9457                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9458         }
9459
9460         start = 0;
9461         while (1) {
9462                 cache = btrfs_lookup_first_block_group(fs_info, start);
9463                 if (!cache)
9464                         break;
9465                 if (cache->cached)
9466                         cache->cached = 0;
9467                 start = cache->key.objectid + cache->key.offset;
9468         }
9469 }
9470
9471 static int check_extent_refs(struct btrfs_root *root,
9472                              struct cache_tree *extent_cache)
9473 {
9474         struct extent_record *rec;
9475         struct cache_extent *cache;
9476         int ret = 0;
9477         int had_dups = 0;
9478
9479         if (repair) {
9480                 /*
9481                  * if we're doing a repair, we have to make sure
9482                  * we don't allocate from the problem extents.
9483                  * In the worst case, this will be all the
9484                  * extents in the FS
9485                  */
9486                 cache = search_cache_extent(extent_cache, 0);
9487                 while(cache) {
9488                         rec = container_of(cache, struct extent_record, cache);
9489                         set_extent_dirty(root->fs_info->excluded_extents,
9490                                          rec->start,
9491                                          rec->start + rec->max_size - 1);
9492                         cache = next_cache_extent(cache);
9493                 }
9494
9495                 /* pin down all the corrupted blocks too */
9496                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9497                 while(cache) {
9498                         set_extent_dirty(root->fs_info->excluded_extents,
9499                                          cache->start,
9500                                          cache->start + cache->size - 1);
9501                         cache = next_cache_extent(cache);
9502                 }
9503                 prune_corrupt_blocks(root->fs_info);
9504                 reset_cached_block_groups(root->fs_info);
9505         }
9506
9507         reset_cached_block_groups(root->fs_info);
9508
9509         /*
9510          * We need to delete any duplicate entries we find first otherwise we
9511          * could mess up the extent tree when we have backrefs that actually
9512          * belong to a different extent item and not the weird duplicate one.
9513          */
9514         while (repair && !list_empty(&duplicate_extents)) {
9515                 rec = to_extent_record(duplicate_extents.next);
9516                 list_del_init(&rec->list);
9517
9518                 /* Sometimes we can find a backref before we find an actual
9519                  * extent, so we need to process it a little bit to see if there
9520                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9521                  * if this is a backref screwup.  If we need to delete stuff
9522                  * process_duplicates() will return 0, otherwise it will return
9523                  * 1 and we
9524                  */
9525                 if (process_duplicates(extent_cache, rec))
9526                         continue;
9527                 ret = delete_duplicate_records(root, rec);
9528                 if (ret < 0)
9529                         return ret;
9530                 /*
9531                  * delete_duplicate_records will return the number of entries
9532                  * deleted, so if it's greater than 0 then we know we actually
9533                  * did something and we need to remove.
9534                  */
9535                 if (ret)
9536                         had_dups = 1;
9537         }
9538
9539         if (had_dups)
9540                 return -EAGAIN;
9541
9542         while(1) {
9543                 int cur_err = 0;
9544                 int fix = 0;
9545
9546                 cache = search_cache_extent(extent_cache, 0);
9547                 if (!cache)
9548                         break;
9549                 rec = container_of(cache, struct extent_record, cache);
9550                 if (rec->num_duplicates) {
9551                         fprintf(stderr, "extent item %llu has multiple extent "
9552                                 "items\n", (unsigned long long)rec->start);
9553                         cur_err = 1;
9554                 }
9555
9556                 if (rec->refs != rec->extent_item_refs) {
9557                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9558                                 (unsigned long long)rec->start,
9559                                 (unsigned long long)rec->nr);
9560                         fprintf(stderr, "extent item %llu, found %llu\n",
9561                                 (unsigned long long)rec->extent_item_refs,
9562                                 (unsigned long long)rec->refs);
9563                         ret = record_orphan_data_extents(root->fs_info, rec);
9564                         if (ret < 0)
9565                                 goto repair_abort;
9566                         fix = ret;
9567                         cur_err = 1;
9568                 }
9569                 if (all_backpointers_checked(rec, 1)) {
9570                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9571                                 (unsigned long long)rec->start,
9572                                 (unsigned long long)rec->nr);
9573                         fix = 1;
9574                         cur_err = 1;
9575                 }
9576                 if (!rec->owner_ref_checked) {
9577                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9578                                 (unsigned long long)rec->start,
9579                                 (unsigned long long)rec->nr);
9580                         fix = 1;
9581                         cur_err = 1;
9582                 }
9583
9584                 if (repair && fix) {
9585                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9586                         if (ret)
9587                                 goto repair_abort;
9588                 }
9589
9590
9591                 if (rec->bad_full_backref) {
9592                         fprintf(stderr, "bad full backref, on [%llu]\n",
9593                                 (unsigned long long)rec->start);
9594                         if (repair) {
9595                                 ret = fixup_extent_flags(root->fs_info, rec);
9596                                 if (ret)
9597                                         goto repair_abort;
9598                                 fix = 1;
9599                         }
9600                         cur_err = 1;
9601                 }
9602                 /*
9603                  * Although it's not a extent ref's problem, we reuse this
9604                  * routine for error reporting.
9605                  * No repair function yet.
9606                  */
9607                 if (rec->crossing_stripes) {
9608                         fprintf(stderr,
9609                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9610                                 rec->start, rec->start + rec->max_size);
9611                         cur_err = 1;
9612                 }
9613
9614                 if (rec->wrong_chunk_type) {
9615                         fprintf(stderr,
9616                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9617                                 rec->start, rec->start + rec->max_size);
9618                         cur_err = 1;
9619                 }
9620
9621                 remove_cache_extent(extent_cache, cache);
9622                 free_all_extent_backrefs(rec);
9623                 if (!init_extent_tree && repair && (!cur_err || fix))
9624                         clear_extent_dirty(root->fs_info->excluded_extents,
9625                                            rec->start,
9626                                            rec->start + rec->max_size - 1);
9627                 free(rec);
9628         }
9629 repair_abort:
9630         if (repair) {
9631                 if (ret && ret != -EAGAIN) {
9632                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9633                         exit(1);
9634                 } else if (!ret) {
9635                         struct btrfs_trans_handle *trans;
9636
9637                         root = root->fs_info->extent_root;
9638                         trans = btrfs_start_transaction(root, 1);
9639                         if (IS_ERR(trans)) {
9640                                 ret = PTR_ERR(trans);
9641                                 goto repair_abort;
9642                         }
9643
9644                         ret = btrfs_fix_block_accounting(trans, root);
9645                         if (ret)
9646                                 goto repair_abort;
9647                         ret = btrfs_commit_transaction(trans, root);
9648                         if (ret)
9649                                 goto repair_abort;
9650                 }
9651                 return ret;
9652         }
9653         return 0;
9654 }
9655
9656 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9657 {
9658         u64 stripe_size;
9659
9660         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9661                 stripe_size = length;
9662                 stripe_size /= num_stripes;
9663         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9664                 stripe_size = length * 2;
9665                 stripe_size /= num_stripes;
9666         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9667                 stripe_size = length;
9668                 stripe_size /= (num_stripes - 1);
9669         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9670                 stripe_size = length;
9671                 stripe_size /= (num_stripes - 2);
9672         } else {
9673                 stripe_size = length;
9674         }
9675         return stripe_size;
9676 }
9677
9678 /*
9679  * Check the chunk with its block group/dev list ref:
9680  * Return 0 if all refs seems valid.
9681  * Return 1 if part of refs seems valid, need later check for rebuild ref
9682  * like missing block group and needs to search extent tree to rebuild them.
9683  * Return -1 if essential refs are missing and unable to rebuild.
9684  */
9685 static int check_chunk_refs(struct chunk_record *chunk_rec,
9686                             struct block_group_tree *block_group_cache,
9687                             struct device_extent_tree *dev_extent_cache,
9688                             int silent)
9689 {
9690         struct cache_extent *block_group_item;
9691         struct block_group_record *block_group_rec;
9692         struct cache_extent *dev_extent_item;
9693         struct device_extent_record *dev_extent_rec;
9694         u64 devid;
9695         u64 offset;
9696         u64 length;
9697         int metadump_v2 = 0;
9698         int i;
9699         int ret = 0;
9700
9701         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9702                                                chunk_rec->offset,
9703                                                chunk_rec->length);
9704         if (block_group_item) {
9705                 block_group_rec = container_of(block_group_item,
9706                                                struct block_group_record,
9707                                                cache);
9708                 if (chunk_rec->length != block_group_rec->offset ||
9709                     chunk_rec->offset != block_group_rec->objectid ||
9710                     (!metadump_v2 &&
9711                      chunk_rec->type_flags != block_group_rec->flags)) {
9712                         if (!silent)
9713                                 fprintf(stderr,
9714                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9715                                         chunk_rec->objectid,
9716                                         chunk_rec->type,
9717                                         chunk_rec->offset,
9718                                         chunk_rec->length,
9719                                         chunk_rec->offset,
9720                                         chunk_rec->type_flags,
9721                                         block_group_rec->objectid,
9722                                         block_group_rec->type,
9723                                         block_group_rec->offset,
9724                                         block_group_rec->offset,
9725                                         block_group_rec->objectid,
9726                                         block_group_rec->flags);
9727                         ret = -1;
9728                 } else {
9729                         list_del_init(&block_group_rec->list);
9730                         chunk_rec->bg_rec = block_group_rec;
9731                 }
9732         } else {
9733                 if (!silent)
9734                         fprintf(stderr,
9735                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9736                                 chunk_rec->objectid,
9737                                 chunk_rec->type,
9738                                 chunk_rec->offset,
9739                                 chunk_rec->length,
9740                                 chunk_rec->offset,
9741                                 chunk_rec->type_flags);
9742                 ret = 1;
9743         }
9744
9745         if (metadump_v2)
9746                 return ret;
9747
9748         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9749                                     chunk_rec->num_stripes);
9750         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9751                 devid = chunk_rec->stripes[i].devid;
9752                 offset = chunk_rec->stripes[i].offset;
9753                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9754                                                        devid, offset, length);
9755                 if (dev_extent_item) {
9756                         dev_extent_rec = container_of(dev_extent_item,
9757                                                 struct device_extent_record,
9758                                                 cache);
9759                         if (dev_extent_rec->objectid != devid ||
9760                             dev_extent_rec->offset != offset ||
9761                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9762                             dev_extent_rec->length != length) {
9763                                 if (!silent)
9764                                         fprintf(stderr,
9765                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9766                                                 chunk_rec->objectid,
9767                                                 chunk_rec->type,
9768                                                 chunk_rec->offset,
9769                                                 chunk_rec->stripes[i].devid,
9770                                                 chunk_rec->stripes[i].offset,
9771                                                 dev_extent_rec->objectid,
9772                                                 dev_extent_rec->offset,
9773                                                 dev_extent_rec->length);
9774                                 ret = -1;
9775                         } else {
9776                                 list_move(&dev_extent_rec->chunk_list,
9777                                           &chunk_rec->dextents);
9778                         }
9779                 } else {
9780                         if (!silent)
9781                                 fprintf(stderr,
9782                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9783                                         chunk_rec->objectid,
9784                                         chunk_rec->type,
9785                                         chunk_rec->offset,
9786                                         chunk_rec->stripes[i].devid,
9787                                         chunk_rec->stripes[i].offset);
9788                         ret = -1;
9789                 }
9790         }
9791         return ret;
9792 }
9793
9794 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9795 int check_chunks(struct cache_tree *chunk_cache,
9796                  struct block_group_tree *block_group_cache,
9797                  struct device_extent_tree *dev_extent_cache,
9798                  struct list_head *good, struct list_head *bad,
9799                  struct list_head *rebuild, int silent)
9800 {
9801         struct cache_extent *chunk_item;
9802         struct chunk_record *chunk_rec;
9803         struct block_group_record *bg_rec;
9804         struct device_extent_record *dext_rec;
9805         int err;
9806         int ret = 0;
9807
9808         chunk_item = first_cache_extent(chunk_cache);
9809         while (chunk_item) {
9810                 chunk_rec = container_of(chunk_item, struct chunk_record,
9811                                          cache);
9812                 err = check_chunk_refs(chunk_rec, block_group_cache,
9813                                        dev_extent_cache, silent);
9814                 if (err < 0)
9815                         ret = err;
9816                 if (err == 0 && good)
9817                         list_add_tail(&chunk_rec->list, good);
9818                 if (err > 0 && rebuild)
9819                         list_add_tail(&chunk_rec->list, rebuild);
9820                 if (err < 0 && bad)
9821                         list_add_tail(&chunk_rec->list, bad);
9822                 chunk_item = next_cache_extent(chunk_item);
9823         }
9824
9825         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9826                 if (!silent)
9827                         fprintf(stderr,
9828                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9829                                 bg_rec->objectid,
9830                                 bg_rec->offset,
9831                                 bg_rec->flags);
9832                 if (!ret)
9833                         ret = 1;
9834         }
9835
9836         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9837                             chunk_list) {
9838                 if (!silent)
9839                         fprintf(stderr,
9840                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9841                                 dext_rec->objectid,
9842                                 dext_rec->offset,
9843                                 dext_rec->length);
9844                 if (!ret)
9845                         ret = 1;
9846         }
9847         return ret;
9848 }
9849
9850
9851 static int check_device_used(struct device_record *dev_rec,
9852                              struct device_extent_tree *dext_cache)
9853 {
9854         struct cache_extent *cache;
9855         struct device_extent_record *dev_extent_rec;
9856         u64 total_byte = 0;
9857
9858         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9859         while (cache) {
9860                 dev_extent_rec = container_of(cache,
9861                                               struct device_extent_record,
9862                                               cache);
9863                 if (dev_extent_rec->objectid != dev_rec->devid)
9864                         break;
9865
9866                 list_del_init(&dev_extent_rec->device_list);
9867                 total_byte += dev_extent_rec->length;
9868                 cache = next_cache_extent(cache);
9869         }
9870
9871         if (total_byte != dev_rec->byte_used) {
9872                 fprintf(stderr,
9873                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9874                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9875                         dev_rec->type, dev_rec->offset);
9876                 return -1;
9877         } else {
9878                 return 0;
9879         }
9880 }
9881
9882 /* check btrfs_dev_item -> btrfs_dev_extent */
9883 static int check_devices(struct rb_root *dev_cache,
9884                          struct device_extent_tree *dev_extent_cache)
9885 {
9886         struct rb_node *dev_node;
9887         struct device_record *dev_rec;
9888         struct device_extent_record *dext_rec;
9889         int err;
9890         int ret = 0;
9891
9892         dev_node = rb_first(dev_cache);
9893         while (dev_node) {
9894                 dev_rec = container_of(dev_node, struct device_record, node);
9895                 err = check_device_used(dev_rec, dev_extent_cache);
9896                 if (err)
9897                         ret = err;
9898
9899                 dev_node = rb_next(dev_node);
9900         }
9901         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9902                             device_list) {
9903                 fprintf(stderr,
9904                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9905                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9906                 if (!ret)
9907                         ret = 1;
9908         }
9909         return ret;
9910 }
9911
9912 static int add_root_item_to_list(struct list_head *head,
9913                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9914                                   u8 level, u8 drop_level,
9915                                   struct btrfs_key *drop_key)
9916 {
9917
9918         struct root_item_record *ri_rec;
9919         ri_rec = malloc(sizeof(*ri_rec));
9920         if (!ri_rec)
9921                 return -ENOMEM;
9922         ri_rec->bytenr = bytenr;
9923         ri_rec->objectid = objectid;
9924         ri_rec->level = level;
9925         ri_rec->drop_level = drop_level;
9926         ri_rec->last_snapshot = last_snapshot;
9927         if (drop_key)
9928                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9929         list_add_tail(&ri_rec->list, head);
9930
9931         return 0;
9932 }
9933
9934 static void free_root_item_list(struct list_head *list)
9935 {
9936         struct root_item_record *ri_rec;
9937
9938         while (!list_empty(list)) {
9939                 ri_rec = list_first_entry(list, struct root_item_record,
9940                                           list);
9941                 list_del_init(&ri_rec->list);
9942                 free(ri_rec);
9943         }
9944 }
9945
9946 static int deal_root_from_list(struct list_head *list,
9947                                struct btrfs_root *root,
9948                                struct block_info *bits,
9949                                int bits_nr,
9950                                struct cache_tree *pending,
9951                                struct cache_tree *seen,
9952                                struct cache_tree *reada,
9953                                struct cache_tree *nodes,
9954                                struct cache_tree *extent_cache,
9955                                struct cache_tree *chunk_cache,
9956                                struct rb_root *dev_cache,
9957                                struct block_group_tree *block_group_cache,
9958                                struct device_extent_tree *dev_extent_cache)
9959 {
9960         int ret = 0;
9961         u64 last;
9962
9963         while (!list_empty(list)) {
9964                 struct root_item_record *rec;
9965                 struct extent_buffer *buf;
9966                 rec = list_entry(list->next,
9967                                  struct root_item_record, list);
9968                 last = 0;
9969                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9970                 if (!extent_buffer_uptodate(buf)) {
9971                         free_extent_buffer(buf);
9972                         ret = -EIO;
9973                         break;
9974                 }
9975                 ret = add_root_to_pending(buf, extent_cache, pending,
9976                                     seen, nodes, rec->objectid);
9977                 if (ret < 0)
9978                         break;
9979                 /*
9980                  * To rebuild extent tree, we need deal with snapshot
9981                  * one by one, otherwise we deal with node firstly which
9982                  * can maximize readahead.
9983                  */
9984                 while (1) {
9985                         ret = run_next_block(root, bits, bits_nr, &last,
9986                                              pending, seen, reada, nodes,
9987                                              extent_cache, chunk_cache,
9988                                              dev_cache, block_group_cache,
9989                                              dev_extent_cache, rec);
9990                         if (ret != 0)
9991                                 break;
9992                 }
9993                 free_extent_buffer(buf);
9994                 list_del(&rec->list);
9995                 free(rec);
9996                 if (ret < 0)
9997                         break;
9998         }
9999         while (ret >= 0) {
10000                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10001                                      reada, nodes, extent_cache, chunk_cache,
10002                                      dev_cache, block_group_cache,
10003                                      dev_extent_cache, NULL);
10004                 if (ret != 0) {
10005                         if (ret > 0)
10006                                 ret = 0;
10007                         break;
10008                 }
10009         }
10010         return ret;
10011 }
10012
10013 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10014 {
10015         struct rb_root dev_cache;
10016         struct cache_tree chunk_cache;
10017         struct block_group_tree block_group_cache;
10018         struct device_extent_tree dev_extent_cache;
10019         struct cache_tree extent_cache;
10020         struct cache_tree seen;
10021         struct cache_tree pending;
10022         struct cache_tree reada;
10023         struct cache_tree nodes;
10024         struct extent_io_tree excluded_extents;
10025         struct cache_tree corrupt_blocks;
10026         struct btrfs_path path;
10027         struct btrfs_key key;
10028         struct btrfs_key found_key;
10029         int ret, err = 0;
10030         struct block_info *bits;
10031         int bits_nr;
10032         struct extent_buffer *leaf;
10033         int slot;
10034         struct btrfs_root_item ri;
10035         struct list_head dropping_trees;
10036         struct list_head normal_trees;
10037         struct btrfs_root *root1;
10038         struct btrfs_root *root;
10039         u64 objectid;
10040         u8 level;
10041
10042         root = fs_info->fs_root;
10043         dev_cache = RB_ROOT;
10044         cache_tree_init(&chunk_cache);
10045         block_group_tree_init(&block_group_cache);
10046         device_extent_tree_init(&dev_extent_cache);
10047
10048         cache_tree_init(&extent_cache);
10049         cache_tree_init(&seen);
10050         cache_tree_init(&pending);
10051         cache_tree_init(&nodes);
10052         cache_tree_init(&reada);
10053         cache_tree_init(&corrupt_blocks);
10054         extent_io_tree_init(&excluded_extents);
10055         INIT_LIST_HEAD(&dropping_trees);
10056         INIT_LIST_HEAD(&normal_trees);
10057
10058         if (repair) {
10059                 fs_info->excluded_extents = &excluded_extents;
10060                 fs_info->fsck_extent_cache = &extent_cache;
10061                 fs_info->free_extent_hook = free_extent_hook;
10062                 fs_info->corrupt_blocks = &corrupt_blocks;
10063         }
10064
10065         bits_nr = 1024;
10066         bits = malloc(bits_nr * sizeof(struct block_info));
10067         if (!bits) {
10068                 perror("malloc");
10069                 exit(1);
10070         }
10071
10072         if (ctx.progress_enabled) {
10073                 ctx.tp = TASK_EXTENTS;
10074                 task_start(ctx.info);
10075         }
10076
10077 again:
10078         root1 = fs_info->tree_root;
10079         level = btrfs_header_level(root1->node);
10080         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10081                                     root1->node->start, 0, level, 0, NULL);
10082         if (ret < 0)
10083                 goto out;
10084         root1 = fs_info->chunk_root;
10085         level = btrfs_header_level(root1->node);
10086         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10087                                     root1->node->start, 0, level, 0, NULL);
10088         if (ret < 0)
10089                 goto out;
10090         btrfs_init_path(&path);
10091         key.offset = 0;
10092         key.objectid = 0;
10093         key.type = BTRFS_ROOT_ITEM_KEY;
10094         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10095         if (ret < 0)
10096                 goto out;
10097         while(1) {
10098                 leaf = path.nodes[0];
10099                 slot = path.slots[0];
10100                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10101                         ret = btrfs_next_leaf(root, &path);
10102                         if (ret != 0)
10103                                 break;
10104                         leaf = path.nodes[0];
10105                         slot = path.slots[0];
10106                 }
10107                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10108                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10109                         unsigned long offset;
10110                         u64 last_snapshot;
10111
10112                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10113                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10114                         last_snapshot = btrfs_root_last_snapshot(&ri);
10115                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10116                                 level = btrfs_root_level(&ri);
10117                                 ret = add_root_item_to_list(&normal_trees,
10118                                                 found_key.objectid,
10119                                                 btrfs_root_bytenr(&ri),
10120                                                 last_snapshot, level,
10121                                                 0, NULL);
10122                                 if (ret < 0)
10123                                         goto out;
10124                         } else {
10125                                 level = btrfs_root_level(&ri);
10126                                 objectid = found_key.objectid;
10127                                 btrfs_disk_key_to_cpu(&found_key,
10128                                                       &ri.drop_progress);
10129                                 ret = add_root_item_to_list(&dropping_trees,
10130                                                 objectid,
10131                                                 btrfs_root_bytenr(&ri),
10132                                                 last_snapshot, level,
10133                                                 ri.drop_level, &found_key);
10134                                 if (ret < 0)
10135                                         goto out;
10136                         }
10137                 }
10138                 path.slots[0]++;
10139         }
10140         btrfs_release_path(&path);
10141
10142         /*
10143          * check_block can return -EAGAIN if it fixes something, please keep
10144          * this in mind when dealing with return values from these functions, if
10145          * we get -EAGAIN we want to fall through and restart the loop.
10146          */
10147         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10148                                   &seen, &reada, &nodes, &extent_cache,
10149                                   &chunk_cache, &dev_cache, &block_group_cache,
10150                                   &dev_extent_cache);
10151         if (ret < 0) {
10152                 if (ret == -EAGAIN)
10153                         goto loop;
10154                 goto out;
10155         }
10156         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10157                                   &pending, &seen, &reada, &nodes,
10158                                   &extent_cache, &chunk_cache, &dev_cache,
10159                                   &block_group_cache, &dev_extent_cache);
10160         if (ret < 0) {
10161                 if (ret == -EAGAIN)
10162                         goto loop;
10163                 goto out;
10164         }
10165
10166         ret = check_chunks(&chunk_cache, &block_group_cache,
10167                            &dev_extent_cache, NULL, NULL, NULL, 0);
10168         if (ret) {
10169                 if (ret == -EAGAIN)
10170                         goto loop;
10171                 err = ret;
10172         }
10173
10174         ret = check_extent_refs(root, &extent_cache);
10175         if (ret < 0) {
10176                 if (ret == -EAGAIN)
10177                         goto loop;
10178                 goto out;
10179         }
10180
10181         ret = check_devices(&dev_cache, &dev_extent_cache);
10182         if (ret && err)
10183                 ret = err;
10184
10185 out:
10186         task_stop(ctx.info);
10187         if (repair) {
10188                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10189                 extent_io_tree_cleanup(&excluded_extents);
10190                 fs_info->fsck_extent_cache = NULL;
10191                 fs_info->free_extent_hook = NULL;
10192                 fs_info->corrupt_blocks = NULL;
10193                 fs_info->excluded_extents = NULL;
10194         }
10195         free(bits);
10196         free_chunk_cache_tree(&chunk_cache);
10197         free_device_cache_tree(&dev_cache);
10198         free_block_group_tree(&block_group_cache);
10199         free_device_extent_tree(&dev_extent_cache);
10200         free_extent_cache_tree(&seen);
10201         free_extent_cache_tree(&pending);
10202         free_extent_cache_tree(&reada);
10203         free_extent_cache_tree(&nodes);
10204         free_root_item_list(&normal_trees);
10205         free_root_item_list(&dropping_trees);
10206         return ret;
10207 loop:
10208         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10209         free_extent_cache_tree(&seen);
10210         free_extent_cache_tree(&pending);
10211         free_extent_cache_tree(&reada);
10212         free_extent_cache_tree(&nodes);
10213         free_chunk_cache_tree(&chunk_cache);
10214         free_block_group_tree(&block_group_cache);
10215         free_device_cache_tree(&dev_cache);
10216         free_device_extent_tree(&dev_extent_cache);
10217         free_extent_record_cache(&extent_cache);
10218         free_root_item_list(&normal_trees);
10219         free_root_item_list(&dropping_trees);
10220         extent_io_tree_cleanup(&excluded_extents);
10221         goto again;
10222 }
10223
10224 /*
10225  * Check backrefs of a tree block given by @bytenr or @eb.
10226  *
10227  * @root:       the root containing the @bytenr or @eb
10228  * @eb:         tree block extent buffer, can be NULL
10229  * @bytenr:     bytenr of the tree block to search
10230  * @level:      tree level of the tree block
10231  * @owner:      owner of the tree block
10232  *
10233  * Return >0 for any error found and output error message
10234  * Return 0 for no error found
10235  */
10236 static int check_tree_block_ref(struct btrfs_root *root,
10237                                 struct extent_buffer *eb, u64 bytenr,
10238                                 int level, u64 owner)
10239 {
10240         struct btrfs_key key;
10241         struct btrfs_root *extent_root = root->fs_info->extent_root;
10242         struct btrfs_path path;
10243         struct btrfs_extent_item *ei;
10244         struct btrfs_extent_inline_ref *iref;
10245         struct extent_buffer *leaf;
10246         unsigned long end;
10247         unsigned long ptr;
10248         int slot;
10249         int skinny_level;
10250         int type;
10251         u32 nodesize = root->fs_info->nodesize;
10252         u32 item_size;
10253         u64 offset;
10254         int tree_reloc_root = 0;
10255         int found_ref = 0;
10256         int err = 0;
10257         int ret;
10258
10259         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10260             btrfs_header_bytenr(root->node) == bytenr)
10261                 tree_reloc_root = 1;
10262
10263         btrfs_init_path(&path);
10264         key.objectid = bytenr;
10265         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10266                 key.type = BTRFS_METADATA_ITEM_KEY;
10267         else
10268                 key.type = BTRFS_EXTENT_ITEM_KEY;
10269         key.offset = (u64)-1;
10270
10271         /* Search for the backref in extent tree */
10272         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10273         if (ret < 0) {
10274                 err |= BACKREF_MISSING;
10275                 goto out;
10276         }
10277         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10278         if (ret) {
10279                 err |= BACKREF_MISSING;
10280                 goto out;
10281         }
10282
10283         leaf = path.nodes[0];
10284         slot = path.slots[0];
10285         btrfs_item_key_to_cpu(leaf, &key, slot);
10286
10287         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10288
10289         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10290                 skinny_level = (int)key.offset;
10291                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10292         } else {
10293                 struct btrfs_tree_block_info *info;
10294
10295                 info = (struct btrfs_tree_block_info *)(ei + 1);
10296                 skinny_level = btrfs_tree_block_level(leaf, info);
10297                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10298         }
10299
10300         if (eb) {
10301                 u64 header_gen;
10302                 u64 extent_gen;
10303
10304                 if (!(btrfs_extent_flags(leaf, ei) &
10305                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10306                         error(
10307                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10308                                 key.objectid, nodesize,
10309                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10310                         err = BACKREF_MISMATCH;
10311                 }
10312                 header_gen = btrfs_header_generation(eb);
10313                 extent_gen = btrfs_extent_generation(leaf, ei);
10314                 if (header_gen != extent_gen) {
10315                         error(
10316         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10317                                 key.objectid, nodesize, header_gen,
10318                                 extent_gen);
10319                         err = BACKREF_MISMATCH;
10320                 }
10321                 if (level != skinny_level) {
10322                         error(
10323                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10324                                 key.objectid, nodesize, level, skinny_level);
10325                         err = BACKREF_MISMATCH;
10326                 }
10327                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10328                         error(
10329                         "extent[%llu %u] is referred by other roots than %llu",
10330                                 key.objectid, nodesize, root->objectid);
10331                         err = BACKREF_MISMATCH;
10332                 }
10333         }
10334
10335         /*
10336          * Iterate the extent/metadata item to find the exact backref
10337          */
10338         item_size = btrfs_item_size_nr(leaf, slot);
10339         ptr = (unsigned long)iref;
10340         end = (unsigned long)ei + item_size;
10341         while (ptr < end) {
10342                 iref = (struct btrfs_extent_inline_ref *)ptr;
10343                 type = btrfs_extent_inline_ref_type(leaf, iref);
10344                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10345
10346                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10347                         (offset == root->objectid || offset == owner)) {
10348                         found_ref = 1;
10349                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10350                         /*
10351                          * Backref of tree reloc root points to itself, no need
10352                          * to check backref any more.
10353                          */
10354                         if (tree_reloc_root)
10355                                 found_ref = 1;
10356                         else
10357                         /* Check if the backref points to valid referencer */
10358                                 found_ref = !check_tree_block_ref(root, NULL,
10359                                                 offset, level + 1, owner);
10360                 }
10361
10362                 if (found_ref)
10363                         break;
10364                 ptr += btrfs_extent_inline_ref_size(type);
10365         }
10366
10367         /*
10368          * Inlined extent item doesn't have what we need, check
10369          * TREE_BLOCK_REF_KEY
10370          */
10371         if (!found_ref) {
10372                 btrfs_release_path(&path);
10373                 key.objectid = bytenr;
10374                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10375                 key.offset = root->objectid;
10376
10377                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10378                 if (!ret)
10379                         found_ref = 1;
10380         }
10381         if (!found_ref)
10382                 err |= BACKREF_MISSING;
10383 out:
10384         btrfs_release_path(&path);
10385         if (eb && (err & BACKREF_MISSING))
10386                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10387                         bytenr, nodesize, owner, level);
10388         return err;
10389 }
10390
10391 /*
10392  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10393  *
10394  * Return >0 any error found and output error message
10395  * Return 0 for no error found
10396  */
10397 static int check_extent_data_item(struct btrfs_root *root,
10398                                   struct extent_buffer *eb, int slot)
10399 {
10400         struct btrfs_file_extent_item *fi;
10401         struct btrfs_path path;
10402         struct btrfs_root *extent_root = root->fs_info->extent_root;
10403         struct btrfs_key fi_key;
10404         struct btrfs_key dbref_key;
10405         struct extent_buffer *leaf;
10406         struct btrfs_extent_item *ei;
10407         struct btrfs_extent_inline_ref *iref;
10408         struct btrfs_extent_data_ref *dref;
10409         u64 owner;
10410         u64 disk_bytenr;
10411         u64 disk_num_bytes;
10412         u64 extent_num_bytes;
10413         u64 extent_flags;
10414         u32 item_size;
10415         unsigned long end;
10416         unsigned long ptr;
10417         int type;
10418         u64 ref_root;
10419         int found_dbackref = 0;
10420         int err = 0;
10421         int ret;
10422
10423         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10424         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10425
10426         /* Nothing to check for hole and inline data extents */
10427         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10428             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10429                 return 0;
10430
10431         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10432         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10433         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10434
10435         /* Check unaligned disk_num_bytes and num_bytes */
10436         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10437                 error(
10438 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10439                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10440                         root->fs_info->sectorsize);
10441                 err |= BYTES_UNALIGNED;
10442         } else {
10443                 data_bytes_allocated += disk_num_bytes;
10444         }
10445         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10446                 error(
10447 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10448                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10449                         root->fs_info->sectorsize);
10450                 err |= BYTES_UNALIGNED;
10451         } else {
10452                 data_bytes_referenced += extent_num_bytes;
10453         }
10454         owner = btrfs_header_owner(eb);
10455
10456         /* Check the extent item of the file extent in extent tree */
10457         btrfs_init_path(&path);
10458         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10459         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10460         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10461
10462         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10463         if (ret)
10464                 goto out;
10465
10466         leaf = path.nodes[0];
10467         slot = path.slots[0];
10468         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10469
10470         extent_flags = btrfs_extent_flags(leaf, ei);
10471
10472         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10473                 error(
10474                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10475                     disk_bytenr, disk_num_bytes,
10476                     BTRFS_EXTENT_FLAG_DATA);
10477                 err |= BACKREF_MISMATCH;
10478         }
10479
10480         /* Check data backref inside that extent item */
10481         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10482         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10483         ptr = (unsigned long)iref;
10484         end = (unsigned long)ei + item_size;
10485         while (ptr < end) {
10486                 iref = (struct btrfs_extent_inline_ref *)ptr;
10487                 type = btrfs_extent_inline_ref_type(leaf, iref);
10488                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10489
10490                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10491                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10492                         if (ref_root == owner || ref_root == root->objectid)
10493                                 found_dbackref = 1;
10494                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10495                         found_dbackref = !check_tree_block_ref(root, NULL,
10496                                 btrfs_extent_inline_ref_offset(leaf, iref),
10497                                 0, owner);
10498                 }
10499
10500                 if (found_dbackref)
10501                         break;
10502                 ptr += btrfs_extent_inline_ref_size(type);
10503         }
10504
10505         if (!found_dbackref) {
10506                 btrfs_release_path(&path);
10507
10508                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10509                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10510                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10511                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10512                                 fi_key.objectid, fi_key.offset);
10513
10514                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10515                                         &dbref_key, &path, 0, 0);
10516                 if (!ret) {
10517                         found_dbackref = 1;
10518                         goto out;
10519                 }
10520
10521                 btrfs_release_path(&path);
10522
10523                 /*
10524                  * Neither inlined nor EXTENT_DATA_REF found, try
10525                  * SHARED_DATA_REF as last chance.
10526                  */
10527                 dbref_key.objectid = disk_bytenr;
10528                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10529                 dbref_key.offset = eb->start;
10530
10531                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10532                                         &dbref_key, &path, 0, 0);
10533                 if (!ret) {
10534                         found_dbackref = 1;
10535                         goto out;
10536                 }
10537         }
10538
10539 out:
10540         if (!found_dbackref)
10541                 err |= BACKREF_MISSING;
10542         btrfs_release_path(&path);
10543         if (err & BACKREF_MISSING) {
10544                 error("data extent[%llu %llu] backref lost",
10545                       disk_bytenr, disk_num_bytes);
10546         }
10547         return err;
10548 }
10549
10550 /*
10551  * Get real tree block level for the case like shared block
10552  * Return >= 0 as tree level
10553  * Return <0 for error
10554  */
10555 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10556 {
10557         struct extent_buffer *eb;
10558         struct btrfs_path path;
10559         struct btrfs_key key;
10560         struct btrfs_extent_item *ei;
10561         u64 flags;
10562         u64 transid;
10563         u8 backref_level;
10564         u8 header_level;
10565         int ret;
10566
10567         /* Search extent tree for extent generation and level */
10568         key.objectid = bytenr;
10569         key.type = BTRFS_METADATA_ITEM_KEY;
10570         key.offset = (u64)-1;
10571
10572         btrfs_init_path(&path);
10573         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10574         if (ret < 0)
10575                 goto release_out;
10576         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10577         if (ret < 0)
10578                 goto release_out;
10579         if (ret > 0) {
10580                 ret = -ENOENT;
10581                 goto release_out;
10582         }
10583
10584         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10585         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10586                             struct btrfs_extent_item);
10587         flags = btrfs_extent_flags(path.nodes[0], ei);
10588         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10589                 ret = -ENOENT;
10590                 goto release_out;
10591         }
10592
10593         /* Get transid for later read_tree_block() check */
10594         transid = btrfs_extent_generation(path.nodes[0], ei);
10595
10596         /* Get backref level as one source */
10597         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10598                 backref_level = key.offset;
10599         } else {
10600                 struct btrfs_tree_block_info *info;
10601
10602                 info = (struct btrfs_tree_block_info *)(ei + 1);
10603                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10604         }
10605         btrfs_release_path(&path);
10606
10607         /* Get level from tree block as an alternative source */
10608         eb = read_tree_block(fs_info, bytenr, transid);
10609         if (!extent_buffer_uptodate(eb)) {
10610                 free_extent_buffer(eb);
10611                 return -EIO;
10612         }
10613         header_level = btrfs_header_level(eb);
10614         free_extent_buffer(eb);
10615
10616         if (header_level != backref_level)
10617                 return -EIO;
10618         return header_level;
10619
10620 release_out:
10621         btrfs_release_path(&path);
10622         return ret;
10623 }
10624
10625 /*
10626  * Check if a tree block backref is valid (points to a valid tree block)
10627  * if level == -1, level will be resolved
10628  * Return >0 for any error found and print error message
10629  */
10630 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10631                                     u64 bytenr, int level)
10632 {
10633         struct btrfs_root *root;
10634         struct btrfs_key key;
10635         struct btrfs_path path;
10636         struct extent_buffer *eb;
10637         struct extent_buffer *node;
10638         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10639         int err = 0;
10640         int ret;
10641
10642         /* Query level for level == -1 special case */
10643         if (level == -1)
10644                 level = query_tree_block_level(fs_info, bytenr);
10645         if (level < 0) {
10646                 err |= REFERENCER_MISSING;
10647                 goto out;
10648         }
10649
10650         key.objectid = root_id;
10651         key.type = BTRFS_ROOT_ITEM_KEY;
10652         key.offset = (u64)-1;
10653
10654         root = btrfs_read_fs_root(fs_info, &key);
10655         if (IS_ERR(root)) {
10656                 err |= REFERENCER_MISSING;
10657                 goto out;
10658         }
10659
10660         /* Read out the tree block to get item/node key */
10661         eb = read_tree_block(fs_info, bytenr, 0);
10662         if (!extent_buffer_uptodate(eb)) {
10663                 err |= REFERENCER_MISSING;
10664                 free_extent_buffer(eb);
10665                 goto out;
10666         }
10667
10668         /* Empty tree, no need to check key */
10669         if (!btrfs_header_nritems(eb) && !level) {
10670                 free_extent_buffer(eb);
10671                 goto out;
10672         }
10673
10674         if (level)
10675                 btrfs_node_key_to_cpu(eb, &key, 0);
10676         else
10677                 btrfs_item_key_to_cpu(eb, &key, 0);
10678
10679         free_extent_buffer(eb);
10680
10681         btrfs_init_path(&path);
10682         path.lowest_level = level;
10683         /* Search with the first key, to ensure we can reach it */
10684         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10685         if (ret < 0) {
10686                 err |= REFERENCER_MISSING;
10687                 goto release_out;
10688         }
10689
10690         node = path.nodes[level];
10691         if (btrfs_header_bytenr(node) != bytenr) {
10692                 error(
10693         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10694                         bytenr, nodesize, bytenr,
10695                         btrfs_header_bytenr(node));
10696                 err |= REFERENCER_MISMATCH;
10697         }
10698         if (btrfs_header_level(node) != level) {
10699                 error(
10700         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10701                         bytenr, nodesize, level,
10702                         btrfs_header_level(node));
10703                 err |= REFERENCER_MISMATCH;
10704         }
10705
10706 release_out:
10707         btrfs_release_path(&path);
10708 out:
10709         if (err & REFERENCER_MISSING) {
10710                 if (level < 0)
10711                         error("extent [%llu %d] lost referencer (owner: %llu)",
10712                                 bytenr, nodesize, root_id);
10713                 else
10714                         error(
10715                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10716                                 bytenr, nodesize, root_id, level);
10717         }
10718
10719         return err;
10720 }
10721
10722 /*
10723  * Check if tree block @eb is tree reloc root.
10724  * Return 0 if it's not or any problem happens
10725  * Return 1 if it's a tree reloc root
10726  */
10727 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10728                                  struct extent_buffer *eb)
10729 {
10730         struct btrfs_root *tree_reloc_root;
10731         struct btrfs_key key;
10732         u64 bytenr = btrfs_header_bytenr(eb);
10733         u64 owner = btrfs_header_owner(eb);
10734         int ret = 0;
10735
10736         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10737         key.offset = owner;
10738         key.type = BTRFS_ROOT_ITEM_KEY;
10739
10740         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10741         if (IS_ERR(tree_reloc_root))
10742                 return 0;
10743
10744         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10745                 ret = 1;
10746         btrfs_free_fs_root(tree_reloc_root);
10747         return ret;
10748 }
10749
10750 /*
10751  * Check referencer for shared block backref
10752  * If level == -1, this function will resolve the level.
10753  */
10754 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10755                                      u64 parent, u64 bytenr, int level)
10756 {
10757         struct extent_buffer *eb;
10758         u32 nr;
10759         int found_parent = 0;
10760         int i;
10761
10762         eb = read_tree_block(fs_info, parent, 0);
10763         if (!extent_buffer_uptodate(eb))
10764                 goto out;
10765
10766         if (level == -1)
10767                 level = query_tree_block_level(fs_info, bytenr);
10768         if (level < 0)
10769                 goto out;
10770
10771         /* It's possible it's a tree reloc root */
10772         if (parent == bytenr) {
10773                 if (is_tree_reloc_root(fs_info, eb))
10774                         found_parent = 1;
10775                 goto out;
10776         }
10777
10778         if (level + 1 != btrfs_header_level(eb))
10779                 goto out;
10780
10781         nr = btrfs_header_nritems(eb);
10782         for (i = 0; i < nr; i++) {
10783                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10784                         found_parent = 1;
10785                         break;
10786                 }
10787         }
10788 out:
10789         free_extent_buffer(eb);
10790         if (!found_parent) {
10791                 error(
10792         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10793                         bytenr, fs_info->nodesize, parent, level);
10794                 return REFERENCER_MISSING;
10795         }
10796         return 0;
10797 }
10798
10799 /*
10800  * Check referencer for normal (inlined) data ref
10801  * If len == 0, it will be resolved by searching in extent tree
10802  */
10803 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10804                                      u64 root_id, u64 objectid, u64 offset,
10805                                      u64 bytenr, u64 len, u32 count)
10806 {
10807         struct btrfs_root *root;
10808         struct btrfs_root *extent_root = fs_info->extent_root;
10809         struct btrfs_key key;
10810         struct btrfs_path path;
10811         struct extent_buffer *leaf;
10812         struct btrfs_file_extent_item *fi;
10813         u32 found_count = 0;
10814         int slot;
10815         int ret = 0;
10816
10817         if (!len) {
10818                 key.objectid = bytenr;
10819                 key.type = BTRFS_EXTENT_ITEM_KEY;
10820                 key.offset = (u64)-1;
10821
10822                 btrfs_init_path(&path);
10823                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10824                 if (ret < 0)
10825                         goto out;
10826                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10827                 if (ret)
10828                         goto out;
10829                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10830                 if (key.objectid != bytenr ||
10831                     key.type != BTRFS_EXTENT_ITEM_KEY)
10832                         goto out;
10833                 len = key.offset;
10834                 btrfs_release_path(&path);
10835         }
10836         key.objectid = root_id;
10837         key.type = BTRFS_ROOT_ITEM_KEY;
10838         key.offset = (u64)-1;
10839         btrfs_init_path(&path);
10840
10841         root = btrfs_read_fs_root(fs_info, &key);
10842         if (IS_ERR(root))
10843                 goto out;
10844
10845         key.objectid = objectid;
10846         key.type = BTRFS_EXTENT_DATA_KEY;
10847         /*
10848          * It can be nasty as data backref offset is
10849          * file offset - file extent offset, which is smaller or
10850          * equal to original backref offset.  The only special case is
10851          * overflow.  So we need to special check and do further search.
10852          */
10853         key.offset = offset & (1ULL << 63) ? 0 : offset;
10854
10855         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10856         if (ret < 0)
10857                 goto out;
10858
10859         /*
10860          * Search afterwards to get correct one
10861          * NOTE: As we must do a comprehensive check on the data backref to
10862          * make sure the dref count also matches, we must iterate all file
10863          * extents for that inode.
10864          */
10865         while (1) {
10866                 leaf = path.nodes[0];
10867                 slot = path.slots[0];
10868
10869                 if (slot >= btrfs_header_nritems(leaf))
10870                         goto next;
10871                 btrfs_item_key_to_cpu(leaf, &key, slot);
10872                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10873                         break;
10874                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10875                 /*
10876                  * Except normal disk bytenr and disk num bytes, we still
10877                  * need to do extra check on dbackref offset as
10878                  * dbackref offset = file_offset - file_extent_offset
10879                  */
10880                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10881                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10882                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10883                     offset)
10884                         found_count++;
10885
10886 next:
10887                 ret = btrfs_next_item(root, &path);
10888                 if (ret)
10889                         break;
10890         }
10891 out:
10892         btrfs_release_path(&path);
10893         if (found_count != count) {
10894                 error(
10895 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10896                         bytenr, len, root_id, objectid, offset, count, found_count);
10897                 return REFERENCER_MISSING;
10898         }
10899         return 0;
10900 }
10901
10902 /*
10903  * Check if the referencer of a shared data backref exists
10904  */
10905 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10906                                      u64 parent, u64 bytenr)
10907 {
10908         struct extent_buffer *eb;
10909         struct btrfs_key key;
10910         struct btrfs_file_extent_item *fi;
10911         u32 nr;
10912         int found_parent = 0;
10913         int i;
10914
10915         eb = read_tree_block(fs_info, parent, 0);
10916         if (!extent_buffer_uptodate(eb))
10917                 goto out;
10918
10919         nr = btrfs_header_nritems(eb);
10920         for (i = 0; i < nr; i++) {
10921                 btrfs_item_key_to_cpu(eb, &key, i);
10922                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10923                         continue;
10924
10925                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10926                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10927                         continue;
10928
10929                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10930                         found_parent = 1;
10931                         break;
10932                 }
10933         }
10934
10935 out:
10936         free_extent_buffer(eb);
10937         if (!found_parent) {
10938                 error("shared extent %llu referencer lost (parent: %llu)",
10939                         bytenr, parent);
10940                 return REFERENCER_MISSING;
10941         }
10942         return 0;
10943 }
10944
10945 /*
10946  * This function will check a given extent item, including its backref and
10947  * itself (like crossing stripe boundary and type)
10948  *
10949  * Since we don't use extent_record anymore, introduce new error bit
10950  */
10951 static int check_extent_item(struct btrfs_fs_info *fs_info,
10952                              struct extent_buffer *eb, int slot)
10953 {
10954         struct btrfs_extent_item *ei;
10955         struct btrfs_extent_inline_ref *iref;
10956         struct btrfs_extent_data_ref *dref;
10957         unsigned long end;
10958         unsigned long ptr;
10959         int type;
10960         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10961         u32 item_size = btrfs_item_size_nr(eb, slot);
10962         u64 flags;
10963         u64 offset;
10964         int metadata = 0;
10965         int level;
10966         struct btrfs_key key;
10967         int ret;
10968         int err = 0;
10969
10970         btrfs_item_key_to_cpu(eb, &key, slot);
10971         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10972                 bytes_used += key.offset;
10973         else
10974                 bytes_used += nodesize;
10975
10976         if (item_size < sizeof(*ei)) {
10977                 /*
10978                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10979                  * old thing when on disk format is still un-determined.
10980                  * No need to care about it anymore
10981                  */
10982                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10983                 return -ENOTTY;
10984         }
10985
10986         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10987         flags = btrfs_extent_flags(eb, ei);
10988
10989         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10990                 metadata = 1;
10991         if (metadata && check_crossing_stripes(global_info, key.objectid,
10992                                                eb->len)) {
10993                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10994                       key.objectid, key.objectid + nodesize);
10995                 err |= CROSSING_STRIPE_BOUNDARY;
10996         }
10997
10998         ptr = (unsigned long)(ei + 1);
10999
11000         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11001                 /* Old EXTENT_ITEM metadata */
11002                 struct btrfs_tree_block_info *info;
11003
11004                 info = (struct btrfs_tree_block_info *)ptr;
11005                 level = btrfs_tree_block_level(eb, info);
11006                 ptr += sizeof(struct btrfs_tree_block_info);
11007         } else {
11008                 /* New METADATA_ITEM */
11009                 level = key.offset;
11010         }
11011         end = (unsigned long)ei + item_size;
11012
11013 next:
11014         /* Reached extent item end normally */
11015         if (ptr == end)
11016                 goto out;
11017
11018         /* Beyond extent item end, wrong item size */
11019         if (ptr > end) {
11020                 err |= ITEM_SIZE_MISMATCH;
11021                 error("extent item at bytenr %llu slot %d has wrong size",
11022                         eb->start, slot);
11023                 goto out;
11024         }
11025
11026         /* Now check every backref in this extent item */
11027         iref = (struct btrfs_extent_inline_ref *)ptr;
11028         type = btrfs_extent_inline_ref_type(eb, iref);
11029         offset = btrfs_extent_inline_ref_offset(eb, iref);
11030         switch (type) {
11031         case BTRFS_TREE_BLOCK_REF_KEY:
11032                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11033                                                level);
11034                 err |= ret;
11035                 break;
11036         case BTRFS_SHARED_BLOCK_REF_KEY:
11037                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11038                                                  level);
11039                 err |= ret;
11040                 break;
11041         case BTRFS_EXTENT_DATA_REF_KEY:
11042                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11043                 ret = check_extent_data_backref(fs_info,
11044                                 btrfs_extent_data_ref_root(eb, dref),
11045                                 btrfs_extent_data_ref_objectid(eb, dref),
11046                                 btrfs_extent_data_ref_offset(eb, dref),
11047                                 key.objectid, key.offset,
11048                                 btrfs_extent_data_ref_count(eb, dref));
11049                 err |= ret;
11050                 break;
11051         case BTRFS_SHARED_DATA_REF_KEY:
11052                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11053                 err |= ret;
11054                 break;
11055         default:
11056                 error("extent[%llu %d %llu] has unknown ref type: %d",
11057                         key.objectid, key.type, key.offset, type);
11058                 err |= UNKNOWN_TYPE;
11059                 goto out;
11060         }
11061
11062         ptr += btrfs_extent_inline_ref_size(type);
11063         goto next;
11064
11065 out:
11066         return err;
11067 }
11068
11069 /*
11070  * Check if a dev extent item is referred correctly by its chunk
11071  */
11072 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11073                                  struct extent_buffer *eb, int slot)
11074 {
11075         struct btrfs_root *chunk_root = fs_info->chunk_root;
11076         struct btrfs_dev_extent *ptr;
11077         struct btrfs_path path;
11078         struct btrfs_key chunk_key;
11079         struct btrfs_key devext_key;
11080         struct btrfs_chunk *chunk;
11081         struct extent_buffer *l;
11082         int num_stripes;
11083         u64 length;
11084         int i;
11085         int found_chunk = 0;
11086         int ret;
11087
11088         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11089         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11090         length = btrfs_dev_extent_length(eb, ptr);
11091
11092         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11093         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11094         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11095
11096         btrfs_init_path(&path);
11097         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11098         if (ret)
11099                 goto out;
11100
11101         l = path.nodes[0];
11102         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11103         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11104                                       chunk_key.offset);
11105         if (ret < 0)
11106                 goto out;
11107
11108         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11109                 goto out;
11110
11111         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11112         for (i = 0; i < num_stripes; i++) {
11113                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11114                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11115
11116                 if (devid == devext_key.objectid &&
11117                     offset == devext_key.offset) {
11118                         found_chunk = 1;
11119                         break;
11120                 }
11121         }
11122 out:
11123         btrfs_release_path(&path);
11124         if (!found_chunk) {
11125                 error(
11126                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11127                         devext_key.objectid, devext_key.offset, length);
11128                 return REFERENCER_MISSING;
11129         }
11130         return 0;
11131 }
11132
11133 /*
11134  * Check if the used space is correct with the dev item
11135  */
11136 static int check_dev_item(struct btrfs_fs_info *fs_info,
11137                           struct extent_buffer *eb, int slot)
11138 {
11139         struct btrfs_root *dev_root = fs_info->dev_root;
11140         struct btrfs_dev_item *dev_item;
11141         struct btrfs_path path;
11142         struct btrfs_key key;
11143         struct btrfs_dev_extent *ptr;
11144         u64 dev_id;
11145         u64 used;
11146         u64 total = 0;
11147         int ret;
11148
11149         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11150         dev_id = btrfs_device_id(eb, dev_item);
11151         used = btrfs_device_bytes_used(eb, dev_item);
11152
11153         key.objectid = dev_id;
11154         key.type = BTRFS_DEV_EXTENT_KEY;
11155         key.offset = 0;
11156
11157         btrfs_init_path(&path);
11158         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11159         if (ret < 0) {
11160                 btrfs_item_key_to_cpu(eb, &key, slot);
11161                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11162                         key.objectid, key.type, key.offset);
11163                 btrfs_release_path(&path);
11164                 return REFERENCER_MISSING;
11165         }
11166
11167         /* Iterate dev_extents to calculate the used space of a device */
11168         while (1) {
11169                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11170                         goto next;
11171
11172                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11173                 if (key.objectid > dev_id)
11174                         break;
11175                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11176                         goto next;
11177
11178                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11179                                      struct btrfs_dev_extent);
11180                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11181 next:
11182                 ret = btrfs_next_item(dev_root, &path);
11183                 if (ret)
11184                         break;
11185         }
11186         btrfs_release_path(&path);
11187
11188         if (used != total) {
11189                 btrfs_item_key_to_cpu(eb, &key, slot);
11190                 error(
11191 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11192                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11193                         BTRFS_DEV_EXTENT_KEY, dev_id);
11194                 return ACCOUNTING_MISMATCH;
11195         }
11196         return 0;
11197 }
11198
11199 /*
11200  * Check a block group item with its referener (chunk) and its used space
11201  * with extent/metadata item
11202  */
11203 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11204                                   struct extent_buffer *eb, int slot)
11205 {
11206         struct btrfs_root *extent_root = fs_info->extent_root;
11207         struct btrfs_root *chunk_root = fs_info->chunk_root;
11208         struct btrfs_block_group_item *bi;
11209         struct btrfs_block_group_item bg_item;
11210         struct btrfs_path path;
11211         struct btrfs_key bg_key;
11212         struct btrfs_key chunk_key;
11213         struct btrfs_key extent_key;
11214         struct btrfs_chunk *chunk;
11215         struct extent_buffer *leaf;
11216         struct btrfs_extent_item *ei;
11217         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11218         u64 flags;
11219         u64 bg_flags;
11220         u64 used;
11221         u64 total = 0;
11222         int ret;
11223         int err = 0;
11224
11225         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11226         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11227         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11228         used = btrfs_block_group_used(&bg_item);
11229         bg_flags = btrfs_block_group_flags(&bg_item);
11230
11231         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11232         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11233         chunk_key.offset = bg_key.objectid;
11234
11235         btrfs_init_path(&path);
11236         /* Search for the referencer chunk */
11237         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11238         if (ret) {
11239                 error(
11240                 "block group[%llu %llu] did not find the related chunk item",
11241                         bg_key.objectid, bg_key.offset);
11242                 err |= REFERENCER_MISSING;
11243         } else {
11244                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11245                                         struct btrfs_chunk);
11246                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11247                                                 bg_key.offset) {
11248                         error(
11249         "block group[%llu %llu] related chunk item length does not match",
11250                                 bg_key.objectid, bg_key.offset);
11251                         err |= REFERENCER_MISMATCH;
11252                 }
11253         }
11254         btrfs_release_path(&path);
11255
11256         /* Search from the block group bytenr */
11257         extent_key.objectid = bg_key.objectid;
11258         extent_key.type = 0;
11259         extent_key.offset = 0;
11260
11261         btrfs_init_path(&path);
11262         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11263         if (ret < 0)
11264                 goto out;
11265
11266         /* Iterate extent tree to account used space */
11267         while (1) {
11268                 leaf = path.nodes[0];
11269
11270                 /* Search slot can point to the last item beyond leaf nritems */
11271                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11272                         goto next;
11273
11274                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11275                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11276                         break;
11277
11278                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11279                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11280                         goto next;
11281                 if (extent_key.objectid < bg_key.objectid)
11282                         goto next;
11283
11284                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11285                         total += nodesize;
11286                 else
11287                         total += extent_key.offset;
11288
11289                 ei = btrfs_item_ptr(leaf, path.slots[0],
11290                                     struct btrfs_extent_item);
11291                 flags = btrfs_extent_flags(leaf, ei);
11292                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11293                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11294                                 error(
11295                         "bad extent[%llu, %llu) type mismatch with chunk",
11296                                         extent_key.objectid,
11297                                         extent_key.objectid + extent_key.offset);
11298                                 err |= CHUNK_TYPE_MISMATCH;
11299                         }
11300                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11301                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11302                                     BTRFS_BLOCK_GROUP_METADATA))) {
11303                                 error(
11304                         "bad extent[%llu, %llu) type mismatch with chunk",
11305                                         extent_key.objectid,
11306                                         extent_key.objectid + nodesize);
11307                                 err |= CHUNK_TYPE_MISMATCH;
11308                         }
11309                 }
11310 next:
11311                 ret = btrfs_next_item(extent_root, &path);
11312                 if (ret)
11313                         break;
11314         }
11315
11316 out:
11317         btrfs_release_path(&path);
11318
11319         if (total != used) {
11320                 error(
11321                 "block group[%llu %llu] used %llu but extent items used %llu",
11322                         bg_key.objectid, bg_key.offset, used, total);
11323                 err |= ACCOUNTING_MISMATCH;
11324         }
11325         return err;
11326 }
11327
11328 /*
11329  * Check a chunk item.
11330  * Including checking all referred dev_extents and block group
11331  */
11332 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11333                             struct extent_buffer *eb, int slot)
11334 {
11335         struct btrfs_root *extent_root = fs_info->extent_root;
11336         struct btrfs_root *dev_root = fs_info->dev_root;
11337         struct btrfs_path path;
11338         struct btrfs_key chunk_key;
11339         struct btrfs_key bg_key;
11340         struct btrfs_key devext_key;
11341         struct btrfs_chunk *chunk;
11342         struct extent_buffer *leaf;
11343         struct btrfs_block_group_item *bi;
11344         struct btrfs_block_group_item bg_item;
11345         struct btrfs_dev_extent *ptr;
11346         u64 length;
11347         u64 chunk_end;
11348         u64 stripe_len;
11349         u64 type;
11350         int num_stripes;
11351         u64 offset;
11352         u64 objectid;
11353         int i;
11354         int ret;
11355         int err = 0;
11356
11357         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11358         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11359         length = btrfs_chunk_length(eb, chunk);
11360         chunk_end = chunk_key.offset + length;
11361         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11362                                       chunk_key.offset);
11363         if (ret < 0) {
11364                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11365                         chunk_end);
11366                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11367                 goto out;
11368         }
11369         type = btrfs_chunk_type(eb, chunk);
11370
11371         bg_key.objectid = chunk_key.offset;
11372         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11373         bg_key.offset = length;
11374
11375         btrfs_init_path(&path);
11376         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11377         if (ret) {
11378                 error(
11379                 "chunk[%llu %llu) did not find the related block group item",
11380                         chunk_key.offset, chunk_end);
11381                 err |= REFERENCER_MISSING;
11382         } else{
11383                 leaf = path.nodes[0];
11384                 bi = btrfs_item_ptr(leaf, path.slots[0],
11385                                     struct btrfs_block_group_item);
11386                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11387                                    sizeof(bg_item));
11388                 if (btrfs_block_group_flags(&bg_item) != type) {
11389                         error(
11390 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11391                                 chunk_key.offset, chunk_end, type,
11392                                 btrfs_block_group_flags(&bg_item));
11393                         err |= REFERENCER_MISSING;
11394                 }
11395         }
11396
11397         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11398         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11399         for (i = 0; i < num_stripes; i++) {
11400                 btrfs_release_path(&path);
11401                 btrfs_init_path(&path);
11402                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11403                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11404                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11405
11406                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11407                                         0, 0);
11408                 if (ret)
11409                         goto not_match_dev;
11410
11411                 leaf = path.nodes[0];
11412                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11413                                      struct btrfs_dev_extent);
11414                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11415                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11416                 if (objectid != chunk_key.objectid ||
11417                     offset != chunk_key.offset ||
11418                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11419                         goto not_match_dev;
11420                 continue;
11421 not_match_dev:
11422                 err |= BACKREF_MISSING;
11423                 error(
11424                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11425                         chunk_key.objectid, chunk_end, i);
11426                 continue;
11427         }
11428         btrfs_release_path(&path);
11429 out:
11430         return err;
11431 }
11432
11433 /*
11434  * Main entry function to check known items and update related accounting info
11435  */
11436 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11437 {
11438         struct btrfs_fs_info *fs_info = root->fs_info;
11439         struct btrfs_key key;
11440         int slot = 0;
11441         int type;
11442         struct btrfs_extent_data_ref *dref;
11443         int ret;
11444         int err = 0;
11445
11446 next:
11447         btrfs_item_key_to_cpu(eb, &key, slot);
11448         type = key.type;
11449
11450         switch (type) {
11451         case BTRFS_EXTENT_DATA_KEY:
11452                 ret = check_extent_data_item(root, eb, slot);
11453                 err |= ret;
11454                 break;
11455         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11456                 ret = check_block_group_item(fs_info, eb, slot);
11457                 err |= ret;
11458                 break;
11459         case BTRFS_DEV_ITEM_KEY:
11460                 ret = check_dev_item(fs_info, eb, slot);
11461                 err |= ret;
11462                 break;
11463         case BTRFS_CHUNK_ITEM_KEY:
11464                 ret = check_chunk_item(fs_info, eb, slot);
11465                 err |= ret;
11466                 break;
11467         case BTRFS_DEV_EXTENT_KEY:
11468                 ret = check_dev_extent_item(fs_info, eb, slot);
11469                 err |= ret;
11470                 break;
11471         case BTRFS_EXTENT_ITEM_KEY:
11472         case BTRFS_METADATA_ITEM_KEY:
11473                 ret = check_extent_item(fs_info, eb, slot);
11474                 err |= ret;
11475                 break;
11476         case BTRFS_EXTENT_CSUM_KEY:
11477                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11478                 break;
11479         case BTRFS_TREE_BLOCK_REF_KEY:
11480                 ret = check_tree_block_backref(fs_info, key.offset,
11481                                                key.objectid, -1);
11482                 err |= ret;
11483                 break;
11484         case BTRFS_EXTENT_DATA_REF_KEY:
11485                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11486                 ret = check_extent_data_backref(fs_info,
11487                                 btrfs_extent_data_ref_root(eb, dref),
11488                                 btrfs_extent_data_ref_objectid(eb, dref),
11489                                 btrfs_extent_data_ref_offset(eb, dref),
11490                                 key.objectid, 0,
11491                                 btrfs_extent_data_ref_count(eb, dref));
11492                 err |= ret;
11493                 break;
11494         case BTRFS_SHARED_BLOCK_REF_KEY:
11495                 ret = check_shared_block_backref(fs_info, key.offset,
11496                                                  key.objectid, -1);
11497                 err |= ret;
11498                 break;
11499         case BTRFS_SHARED_DATA_REF_KEY:
11500                 ret = check_shared_data_backref(fs_info, key.offset,
11501                                                 key.objectid);
11502                 err |= ret;
11503                 break;
11504         default:
11505                 break;
11506         }
11507
11508         if (++slot < btrfs_header_nritems(eb))
11509                 goto next;
11510
11511         return err;
11512 }
11513
11514 /*
11515  * Helper function for later fs/subvol tree check.  To determine if a tree
11516  * block should be checked.
11517  * This function will ensure only the direct referencer with lowest rootid to
11518  * check a fs/subvolume tree block.
11519  *
11520  * Backref check at extent tree would detect errors like missing subvolume
11521  * tree, so we can do aggressive check to reduce duplicated checks.
11522  */
11523 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11524 {
11525         struct btrfs_root *extent_root = root->fs_info->extent_root;
11526         struct btrfs_key key;
11527         struct btrfs_path path;
11528         struct extent_buffer *leaf;
11529         int slot;
11530         struct btrfs_extent_item *ei;
11531         unsigned long ptr;
11532         unsigned long end;
11533         int type;
11534         u32 item_size;
11535         u64 offset;
11536         struct btrfs_extent_inline_ref *iref;
11537         int ret;
11538
11539         btrfs_init_path(&path);
11540         key.objectid = btrfs_header_bytenr(eb);
11541         key.type = BTRFS_METADATA_ITEM_KEY;
11542         key.offset = (u64)-1;
11543
11544         /*
11545          * Any failure in backref resolving means we can't determine
11546          * whom the tree block belongs to.
11547          * So in that case, we need to check that tree block
11548          */
11549         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11550         if (ret < 0)
11551                 goto need_check;
11552
11553         ret = btrfs_previous_extent_item(extent_root, &path,
11554                                          btrfs_header_bytenr(eb));
11555         if (ret)
11556                 goto need_check;
11557
11558         leaf = path.nodes[0];
11559         slot = path.slots[0];
11560         btrfs_item_key_to_cpu(leaf, &key, slot);
11561         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11562
11563         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11564                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11565         } else {
11566                 struct btrfs_tree_block_info *info;
11567
11568                 info = (struct btrfs_tree_block_info *)(ei + 1);
11569                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11570         }
11571
11572         item_size = btrfs_item_size_nr(leaf, slot);
11573         ptr = (unsigned long)iref;
11574         end = (unsigned long)ei + item_size;
11575         while (ptr < end) {
11576                 iref = (struct btrfs_extent_inline_ref *)ptr;
11577                 type = btrfs_extent_inline_ref_type(leaf, iref);
11578                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11579
11580                 /*
11581                  * We only check the tree block if current root is
11582                  * the lowest referencer of it.
11583                  */
11584                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11585                     offset < root->objectid) {
11586                         btrfs_release_path(&path);
11587                         return 0;
11588                 }
11589
11590                 ptr += btrfs_extent_inline_ref_size(type);
11591         }
11592         /*
11593          * Normally we should also check keyed tree block ref, but that may be
11594          * very time consuming.  Inlined ref should already make us skip a lot
11595          * of refs now.  So skip search keyed tree block ref.
11596          */
11597
11598 need_check:
11599         btrfs_release_path(&path);
11600         return 1;
11601 }
11602
11603 /*
11604  * Traversal function for tree block. We will do:
11605  * 1) Skip shared fs/subvolume tree blocks
11606  * 2) Update related bytes accounting
11607  * 3) Pre-order traversal
11608  */
11609 static int traverse_tree_block(struct btrfs_root *root,
11610                                 struct extent_buffer *node)
11611 {
11612         struct extent_buffer *eb;
11613         struct btrfs_key key;
11614         struct btrfs_key drop_key;
11615         int level;
11616         u64 nr;
11617         int i;
11618         int err = 0;
11619         int ret;
11620
11621         /*
11622          * Skip shared fs/subvolume tree block, in that case they will
11623          * be checked by referencer with lowest rootid
11624          */
11625         if (is_fstree(root->objectid) && !should_check(root, node))
11626                 return 0;
11627
11628         /* Update bytes accounting */
11629         total_btree_bytes += node->len;
11630         if (fs_root_objectid(btrfs_header_owner(node)))
11631                 total_fs_tree_bytes += node->len;
11632         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11633                 total_extent_tree_bytes += node->len;
11634
11635         /* pre-order tranversal, check itself first */
11636         level = btrfs_header_level(node);
11637         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11638                                    btrfs_header_level(node),
11639                                    btrfs_header_owner(node));
11640         err |= ret;
11641         if (err)
11642                 error(
11643         "check %s failed root %llu bytenr %llu level %d, force continue check",
11644                         level ? "node":"leaf", root->objectid,
11645                         btrfs_header_bytenr(node), btrfs_header_level(node));
11646
11647         if (!level) {
11648                 btree_space_waste += btrfs_leaf_free_space(root, node);
11649                 ret = check_leaf_items(root, node);
11650                 err |= ret;
11651                 return err;
11652         }
11653
11654         nr = btrfs_header_nritems(node);
11655         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11656         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11657                 sizeof(struct btrfs_key_ptr);
11658
11659         /* Then check all its children */
11660         for (i = 0; i < nr; i++) {
11661                 u64 blocknr = btrfs_node_blockptr(node, i);
11662
11663                 btrfs_node_key_to_cpu(node, &key, i);
11664                 if (level == root->root_item.drop_level &&
11665                     is_dropped_key(&key, &drop_key))
11666                         continue;
11667
11668                 /*
11669                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11670                  * to call the function itself.
11671                  */
11672                 eb = read_tree_block(root->fs_info, blocknr, 0);
11673                 if (extent_buffer_uptodate(eb)) {
11674                         ret = traverse_tree_block(root, eb);
11675                         err |= ret;
11676                 }
11677                 free_extent_buffer(eb);
11678         }
11679
11680         return err;
11681 }
11682
11683 /*
11684  * Low memory usage version check_chunks_and_extents.
11685  */
11686 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11687 {
11688         struct btrfs_path path;
11689         struct btrfs_key key;
11690         struct btrfs_root *root1;
11691         struct btrfs_root *root;
11692         struct btrfs_root *cur_root;
11693         int err = 0;
11694         int ret;
11695
11696         root = fs_info->fs_root;
11697
11698         root1 = root->fs_info->chunk_root;
11699         ret = traverse_tree_block(root1, root1->node);
11700         err |= ret;
11701
11702         root1 = root->fs_info->tree_root;
11703         ret = traverse_tree_block(root1, root1->node);
11704         err |= ret;
11705
11706         btrfs_init_path(&path);
11707         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11708         key.offset = 0;
11709         key.type = BTRFS_ROOT_ITEM_KEY;
11710
11711         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11712         if (ret) {
11713                 error("cannot find extent treet in tree_root");
11714                 goto out;
11715         }
11716
11717         while (1) {
11718                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11719                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11720                         goto next;
11721                 key.offset = (u64)-1;
11722
11723                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11724                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11725                                         &key);
11726                 else
11727                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11728                 if (IS_ERR(cur_root) || !cur_root) {
11729                         error("failed to read tree: %lld", key.objectid);
11730                         goto next;
11731                 }
11732
11733                 ret = traverse_tree_block(cur_root, cur_root->node);
11734                 err |= ret;
11735
11736                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11737                         btrfs_free_fs_root(cur_root);
11738 next:
11739                 ret = btrfs_next_item(root1, &path);
11740                 if (ret)
11741                         goto out;
11742         }
11743
11744 out:
11745         btrfs_release_path(&path);
11746         return err;
11747 }
11748
11749 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11750 {
11751         int ret;
11752
11753         if (!ctx.progress_enabled)
11754                 fprintf(stderr, "checking extents\n");
11755         if (check_mode == CHECK_MODE_LOWMEM)
11756                 ret = check_chunks_and_extents_v2(fs_info);
11757         else
11758                 ret = check_chunks_and_extents(fs_info);
11759
11760         return ret;
11761 }
11762
11763 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11764                            struct btrfs_root *root, int overwrite)
11765 {
11766         struct extent_buffer *c;
11767         struct extent_buffer *old = root->node;
11768         int level;
11769         int ret;
11770         struct btrfs_disk_key disk_key = {0,0,0};
11771
11772         level = 0;
11773
11774         if (overwrite) {
11775                 c = old;
11776                 extent_buffer_get(c);
11777                 goto init;
11778         }
11779         c = btrfs_alloc_free_block(trans, root,
11780                                    root->fs_info->nodesize,
11781                                    root->root_key.objectid,
11782                                    &disk_key, level, 0, 0);
11783         if (IS_ERR(c)) {
11784                 c = old;
11785                 extent_buffer_get(c);
11786                 overwrite = 1;
11787         }
11788 init:
11789         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11790         btrfs_set_header_level(c, level);
11791         btrfs_set_header_bytenr(c, c->start);
11792         btrfs_set_header_generation(c, trans->transid);
11793         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11794         btrfs_set_header_owner(c, root->root_key.objectid);
11795
11796         write_extent_buffer(c, root->fs_info->fsid,
11797                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11798
11799         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11800                             btrfs_header_chunk_tree_uuid(c),
11801                             BTRFS_UUID_SIZE);
11802
11803         btrfs_mark_buffer_dirty(c);
11804         /*
11805          * this case can happen in the following case:
11806          *
11807          * 1.overwrite previous root.
11808          *
11809          * 2.reinit reloc data root, this is because we skip pin
11810          * down reloc data tree before which means we can allocate
11811          * same block bytenr here.
11812          */
11813         if (old->start == c->start) {
11814                 btrfs_set_root_generation(&root->root_item,
11815                                           trans->transid);
11816                 root->root_item.level = btrfs_header_level(root->node);
11817                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11818                                         &root->root_key, &root->root_item);
11819                 if (ret) {
11820                         free_extent_buffer(c);
11821                         return ret;
11822                 }
11823         }
11824         free_extent_buffer(old);
11825         root->node = c;
11826         add_root_to_dirty_list(root);
11827         return 0;
11828 }
11829
11830 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11831                                 struct extent_buffer *eb, int tree_root)
11832 {
11833         struct extent_buffer *tmp;
11834         struct btrfs_root_item *ri;
11835         struct btrfs_key key;
11836         u64 bytenr;
11837         int level = btrfs_header_level(eb);
11838         int nritems;
11839         int ret;
11840         int i;
11841
11842         /*
11843          * If we have pinned this block before, don't pin it again.
11844          * This can not only avoid forever loop with broken filesystem
11845          * but also give us some speedups.
11846          */
11847         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11848                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11849                 return 0;
11850
11851         btrfs_pin_extent(fs_info, eb->start, eb->len);
11852
11853         nritems = btrfs_header_nritems(eb);
11854         for (i = 0; i < nritems; i++) {
11855                 if (level == 0) {
11856                         btrfs_item_key_to_cpu(eb, &key, i);
11857                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11858                                 continue;
11859                         /* Skip the extent root and reloc roots */
11860                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11861                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11862                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11863                                 continue;
11864                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11865                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11866
11867                         /*
11868                          * If at any point we start needing the real root we
11869                          * will have to build a stump root for the root we are
11870                          * in, but for now this doesn't actually use the root so
11871                          * just pass in extent_root.
11872                          */
11873                         tmp = read_tree_block(fs_info, bytenr, 0);
11874                         if (!extent_buffer_uptodate(tmp)) {
11875                                 fprintf(stderr, "Error reading root block\n");
11876                                 return -EIO;
11877                         }
11878                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11879                         free_extent_buffer(tmp);
11880                         if (ret)
11881                                 return ret;
11882                 } else {
11883                         bytenr = btrfs_node_blockptr(eb, i);
11884
11885                         /* If we aren't the tree root don't read the block */
11886                         if (level == 1 && !tree_root) {
11887                                 btrfs_pin_extent(fs_info, bytenr,
11888                                                 fs_info->nodesize);
11889                                 continue;
11890                         }
11891
11892                         tmp = read_tree_block(fs_info, bytenr, 0);
11893                         if (!extent_buffer_uptodate(tmp)) {
11894                                 fprintf(stderr, "Error reading tree block\n");
11895                                 return -EIO;
11896                         }
11897                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11898                         free_extent_buffer(tmp);
11899                         if (ret)
11900                                 return ret;
11901                 }
11902         }
11903
11904         return 0;
11905 }
11906
11907 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11908 {
11909         int ret;
11910
11911         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11912         if (ret)
11913                 return ret;
11914
11915         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11916 }
11917
11918 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11919 {
11920         struct btrfs_block_group_cache *cache;
11921         struct btrfs_path path;
11922         struct extent_buffer *leaf;
11923         struct btrfs_chunk *chunk;
11924         struct btrfs_key key;
11925         int ret;
11926         u64 start;
11927
11928         btrfs_init_path(&path);
11929         key.objectid = 0;
11930         key.type = BTRFS_CHUNK_ITEM_KEY;
11931         key.offset = 0;
11932         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11933         if (ret < 0) {
11934                 btrfs_release_path(&path);
11935                 return ret;
11936         }
11937
11938         /*
11939          * We do this in case the block groups were screwed up and had alloc
11940          * bits that aren't actually set on the chunks.  This happens with
11941          * restored images every time and could happen in real life I guess.
11942          */
11943         fs_info->avail_data_alloc_bits = 0;
11944         fs_info->avail_metadata_alloc_bits = 0;
11945         fs_info->avail_system_alloc_bits = 0;
11946
11947         /* First we need to create the in-memory block groups */
11948         while (1) {
11949                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11950                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11951                         if (ret < 0) {
11952                                 btrfs_release_path(&path);
11953                                 return ret;
11954                         }
11955                         if (ret) {
11956                                 ret = 0;
11957                                 break;
11958                         }
11959                 }
11960                 leaf = path.nodes[0];
11961                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11962                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11963                         path.slots[0]++;
11964                         continue;
11965                 }
11966
11967                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11968                 btrfs_add_block_group(fs_info, 0,
11969                                       btrfs_chunk_type(leaf, chunk),
11970                                       key.objectid, key.offset,
11971                                       btrfs_chunk_length(leaf, chunk));
11972                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11973                                  key.offset + btrfs_chunk_length(leaf, chunk));
11974                 path.slots[0]++;
11975         }
11976         start = 0;
11977         while (1) {
11978                 cache = btrfs_lookup_first_block_group(fs_info, start);
11979                 if (!cache)
11980                         break;
11981                 cache->cached = 1;
11982                 start = cache->key.objectid + cache->key.offset;
11983         }
11984
11985         btrfs_release_path(&path);
11986         return 0;
11987 }
11988
11989 static int reset_balance(struct btrfs_trans_handle *trans,
11990                          struct btrfs_fs_info *fs_info)
11991 {
11992         struct btrfs_root *root = fs_info->tree_root;
11993         struct btrfs_path path;
11994         struct extent_buffer *leaf;
11995         struct btrfs_key key;
11996         int del_slot, del_nr = 0;
11997         int ret;
11998         int found = 0;
11999
12000         btrfs_init_path(&path);
12001         key.objectid = BTRFS_BALANCE_OBJECTID;
12002         key.type = BTRFS_BALANCE_ITEM_KEY;
12003         key.offset = 0;
12004         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12005         if (ret) {
12006                 if (ret > 0)
12007                         ret = 0;
12008                 if (!ret)
12009                         goto reinit_data_reloc;
12010                 else
12011                         goto out;
12012         }
12013
12014         ret = btrfs_del_item(trans, root, &path);
12015         if (ret)
12016                 goto out;
12017         btrfs_release_path(&path);
12018
12019         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12020         key.type = BTRFS_ROOT_ITEM_KEY;
12021         key.offset = 0;
12022         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12023         if (ret < 0)
12024                 goto out;
12025         while (1) {
12026                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12027                         if (!found)
12028                                 break;
12029
12030                         if (del_nr) {
12031                                 ret = btrfs_del_items(trans, root, &path,
12032                                                       del_slot, del_nr);
12033                                 del_nr = 0;
12034                                 if (ret)
12035                                         goto out;
12036                         }
12037                         key.offset++;
12038                         btrfs_release_path(&path);
12039
12040                         found = 0;
12041                         ret = btrfs_search_slot(trans, root, &key, &path,
12042                                                 -1, 1);
12043                         if (ret < 0)
12044                                 goto out;
12045                         continue;
12046                 }
12047                 found = 1;
12048                 leaf = path.nodes[0];
12049                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12050                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12051                         break;
12052                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12053                         path.slots[0]++;
12054                         continue;
12055                 }
12056                 if (!del_nr) {
12057                         del_slot = path.slots[0];
12058                         del_nr = 1;
12059                 } else {
12060                         del_nr++;
12061                 }
12062                 path.slots[0]++;
12063         }
12064
12065         if (del_nr) {
12066                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12067                 if (ret)
12068                         goto out;
12069         }
12070         btrfs_release_path(&path);
12071
12072 reinit_data_reloc:
12073         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12074         key.type = BTRFS_ROOT_ITEM_KEY;
12075         key.offset = (u64)-1;
12076         root = btrfs_read_fs_root(fs_info, &key);
12077         if (IS_ERR(root)) {
12078                 fprintf(stderr, "Error reading data reloc tree\n");
12079                 ret = PTR_ERR(root);
12080                 goto out;
12081         }
12082         record_root_in_trans(trans, root);
12083         ret = btrfs_fsck_reinit_root(trans, root, 0);
12084         if (ret)
12085                 goto out;
12086         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12087 out:
12088         btrfs_release_path(&path);
12089         return ret;
12090 }
12091
12092 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12093                               struct btrfs_fs_info *fs_info)
12094 {
12095         u64 start = 0;
12096         int ret;
12097
12098         /*
12099          * The only reason we don't do this is because right now we're just
12100          * walking the trees we find and pinning down their bytes, we don't look
12101          * at any of the leaves.  In order to do mixed groups we'd have to check
12102          * the leaves of any fs roots and pin down the bytes for any file
12103          * extents we find.  Not hard but why do it if we don't have to?
12104          */
12105         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12106                 fprintf(stderr, "We don't support re-initing the extent tree "
12107                         "for mixed block groups yet, please notify a btrfs "
12108                         "developer you want to do this so they can add this "
12109                         "functionality.\n");
12110                 return -EINVAL;
12111         }
12112
12113         /*
12114          * first we need to walk all of the trees except the extent tree and pin
12115          * down the bytes that are in use so we don't overwrite any existing
12116          * metadata.
12117          */
12118         ret = pin_metadata_blocks(fs_info);
12119         if (ret) {
12120                 fprintf(stderr, "error pinning down used bytes\n");
12121                 return ret;
12122         }
12123
12124         /*
12125          * Need to drop all the block groups since we're going to recreate all
12126          * of them again.
12127          */
12128         btrfs_free_block_groups(fs_info);
12129         ret = reset_block_groups(fs_info);
12130         if (ret) {
12131                 fprintf(stderr, "error resetting the block groups\n");
12132                 return ret;
12133         }
12134
12135         /* Ok we can allocate now, reinit the extent root */
12136         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12137         if (ret) {
12138                 fprintf(stderr, "extent root initialization failed\n");
12139                 /*
12140                  * When the transaction code is updated we should end the
12141                  * transaction, but for now progs only knows about commit so
12142                  * just return an error.
12143                  */
12144                 return ret;
12145         }
12146
12147         /*
12148          * Now we have all the in-memory block groups setup so we can make
12149          * allocations properly, and the metadata we care about is safe since we
12150          * pinned all of it above.
12151          */
12152         while (1) {
12153                 struct btrfs_block_group_cache *cache;
12154
12155                 cache = btrfs_lookup_first_block_group(fs_info, start);
12156                 if (!cache)
12157                         break;
12158                 start = cache->key.objectid + cache->key.offset;
12159                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12160                                         &cache->key, &cache->item,
12161                                         sizeof(cache->item));
12162                 if (ret) {
12163                         fprintf(stderr, "Error adding block group\n");
12164                         return ret;
12165                 }
12166                 btrfs_extent_post_op(trans, fs_info->extent_root);
12167         }
12168
12169         ret = reset_balance(trans, fs_info);
12170         if (ret)
12171                 fprintf(stderr, "error resetting the pending balance\n");
12172
12173         return ret;
12174 }
12175
12176 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12177 {
12178         struct btrfs_path path;
12179         struct btrfs_trans_handle *trans;
12180         struct btrfs_key key;
12181         int ret;
12182
12183         printf("Recowing metadata block %llu\n", eb->start);
12184         key.objectid = btrfs_header_owner(eb);
12185         key.type = BTRFS_ROOT_ITEM_KEY;
12186         key.offset = (u64)-1;
12187
12188         root = btrfs_read_fs_root(root->fs_info, &key);
12189         if (IS_ERR(root)) {
12190                 fprintf(stderr, "Couldn't find owner root %llu\n",
12191                         key.objectid);
12192                 return PTR_ERR(root);
12193         }
12194
12195         trans = btrfs_start_transaction(root, 1);
12196         if (IS_ERR(trans))
12197                 return PTR_ERR(trans);
12198
12199         btrfs_init_path(&path);
12200         path.lowest_level = btrfs_header_level(eb);
12201         if (path.lowest_level)
12202                 btrfs_node_key_to_cpu(eb, &key, 0);
12203         else
12204                 btrfs_item_key_to_cpu(eb, &key, 0);
12205
12206         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12207         btrfs_commit_transaction(trans, root);
12208         btrfs_release_path(&path);
12209         return ret;
12210 }
12211
12212 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12213 {
12214         struct btrfs_path path;
12215         struct btrfs_trans_handle *trans;
12216         struct btrfs_key key;
12217         int ret;
12218
12219         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12220                bad->key.type, bad->key.offset);
12221         key.objectid = bad->root_id;
12222         key.type = BTRFS_ROOT_ITEM_KEY;
12223         key.offset = (u64)-1;
12224
12225         root = btrfs_read_fs_root(root->fs_info, &key);
12226         if (IS_ERR(root)) {
12227                 fprintf(stderr, "Couldn't find owner root %llu\n",
12228                         key.objectid);
12229                 return PTR_ERR(root);
12230         }
12231
12232         trans = btrfs_start_transaction(root, 1);
12233         if (IS_ERR(trans))
12234                 return PTR_ERR(trans);
12235
12236         btrfs_init_path(&path);
12237         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12238         if (ret) {
12239                 if (ret > 0)
12240                         ret = 0;
12241                 goto out;
12242         }
12243         ret = btrfs_del_item(trans, root, &path);
12244 out:
12245         btrfs_commit_transaction(trans, root);
12246         btrfs_release_path(&path);
12247         return ret;
12248 }
12249
12250 static int zero_log_tree(struct btrfs_root *root)
12251 {
12252         struct btrfs_trans_handle *trans;
12253         int ret;
12254
12255         trans = btrfs_start_transaction(root, 1);
12256         if (IS_ERR(trans)) {
12257                 ret = PTR_ERR(trans);
12258                 return ret;
12259         }
12260         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12261         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12262         ret = btrfs_commit_transaction(trans, root);
12263         return ret;
12264 }
12265
12266 static int populate_csum(struct btrfs_trans_handle *trans,
12267                          struct btrfs_root *csum_root, char *buf, u64 start,
12268                          u64 len)
12269 {
12270         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12271         u64 offset = 0;
12272         u64 sectorsize;
12273         int ret = 0;
12274
12275         while (offset < len) {
12276                 sectorsize = fs_info->sectorsize;
12277                 ret = read_extent_data(fs_info, buf, start + offset,
12278                                        &sectorsize, 0);
12279                 if (ret)
12280                         break;
12281                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12282                                             start + offset, buf, sectorsize);
12283                 if (ret)
12284                         break;
12285                 offset += sectorsize;
12286         }
12287         return ret;
12288 }
12289
12290 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12291                                       struct btrfs_root *csum_root,
12292                                       struct btrfs_root *cur_root)
12293 {
12294         struct btrfs_path path;
12295         struct btrfs_key key;
12296         struct extent_buffer *node;
12297         struct btrfs_file_extent_item *fi;
12298         char *buf = NULL;
12299         u64 start = 0;
12300         u64 len = 0;
12301         int slot = 0;
12302         int ret = 0;
12303
12304         buf = malloc(cur_root->fs_info->sectorsize);
12305         if (!buf)
12306                 return -ENOMEM;
12307
12308         btrfs_init_path(&path);
12309         key.objectid = 0;
12310         key.offset = 0;
12311         key.type = 0;
12312         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12313         if (ret < 0)
12314                 goto out;
12315         /* Iterate all regular file extents and fill its csum */
12316         while (1) {
12317                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12318
12319                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12320                         goto next;
12321                 node = path.nodes[0];
12322                 slot = path.slots[0];
12323                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12324                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12325                         goto next;
12326                 start = btrfs_file_extent_disk_bytenr(node, fi);
12327                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12328
12329                 ret = populate_csum(trans, csum_root, buf, start, len);
12330                 if (ret == -EEXIST)
12331                         ret = 0;
12332                 if (ret < 0)
12333                         goto out;
12334 next:
12335                 /*
12336                  * TODO: if next leaf is corrupted, jump to nearest next valid
12337                  * leaf.
12338                  */
12339                 ret = btrfs_next_item(cur_root, &path);
12340                 if (ret < 0)
12341                         goto out;
12342                 if (ret > 0) {
12343                         ret = 0;
12344                         goto out;
12345                 }
12346         }
12347
12348 out:
12349         btrfs_release_path(&path);
12350         free(buf);
12351         return ret;
12352 }
12353
12354 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12355                                   struct btrfs_root *csum_root)
12356 {
12357         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12358         struct btrfs_path path;
12359         struct btrfs_root *tree_root = fs_info->tree_root;
12360         struct btrfs_root *cur_root;
12361         struct extent_buffer *node;
12362         struct btrfs_key key;
12363         int slot = 0;
12364         int ret = 0;
12365
12366         btrfs_init_path(&path);
12367         key.objectid = BTRFS_FS_TREE_OBJECTID;
12368         key.offset = 0;
12369         key.type = BTRFS_ROOT_ITEM_KEY;
12370         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12371         if (ret < 0)
12372                 goto out;
12373         if (ret > 0) {
12374                 ret = -ENOENT;
12375                 goto out;
12376         }
12377
12378         while (1) {
12379                 node = path.nodes[0];
12380                 slot = path.slots[0];
12381                 btrfs_item_key_to_cpu(node, &key, slot);
12382                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12383                         goto out;
12384                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12385                         goto next;
12386                 if (!is_fstree(key.objectid))
12387                         goto next;
12388                 key.offset = (u64)-1;
12389
12390                 cur_root = btrfs_read_fs_root(fs_info, &key);
12391                 if (IS_ERR(cur_root) || !cur_root) {
12392                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12393                                 key.objectid);
12394                         goto out;
12395                 }
12396                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12397                                 cur_root);
12398                 if (ret < 0)
12399                         goto out;
12400 next:
12401                 ret = btrfs_next_item(tree_root, &path);
12402                 if (ret > 0) {
12403                         ret = 0;
12404                         goto out;
12405                 }
12406                 if (ret < 0)
12407                         goto out;
12408         }
12409
12410 out:
12411         btrfs_release_path(&path);
12412         return ret;
12413 }
12414
12415 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12416                                       struct btrfs_root *csum_root)
12417 {
12418         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12419         struct btrfs_path path;
12420         struct btrfs_extent_item *ei;
12421         struct extent_buffer *leaf;
12422         char *buf;
12423         struct btrfs_key key;
12424         int ret;
12425
12426         btrfs_init_path(&path);
12427         key.objectid = 0;
12428         key.type = BTRFS_EXTENT_ITEM_KEY;
12429         key.offset = 0;
12430         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12431         if (ret < 0) {
12432                 btrfs_release_path(&path);
12433                 return ret;
12434         }
12435
12436         buf = malloc(csum_root->fs_info->sectorsize);
12437         if (!buf) {
12438                 btrfs_release_path(&path);
12439                 return -ENOMEM;
12440         }
12441
12442         while (1) {
12443                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12444                         ret = btrfs_next_leaf(extent_root, &path);
12445                         if (ret < 0)
12446                                 break;
12447                         if (ret) {
12448                                 ret = 0;
12449                                 break;
12450                         }
12451                 }
12452                 leaf = path.nodes[0];
12453
12454                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12455                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12456                         path.slots[0]++;
12457                         continue;
12458                 }
12459
12460                 ei = btrfs_item_ptr(leaf, path.slots[0],
12461                                     struct btrfs_extent_item);
12462                 if (!(btrfs_extent_flags(leaf, ei) &
12463                       BTRFS_EXTENT_FLAG_DATA)) {
12464                         path.slots[0]++;
12465                         continue;
12466                 }
12467
12468                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12469                                     key.offset);
12470                 if (ret)
12471                         break;
12472                 path.slots[0]++;
12473         }
12474
12475         btrfs_release_path(&path);
12476         free(buf);
12477         return ret;
12478 }
12479
12480 /*
12481  * Recalculate the csum and put it into the csum tree.
12482  *
12483  * Extent tree init will wipe out all the extent info, so in that case, we
12484  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12485  * will use fs/subvol trees to init the csum tree.
12486  */
12487 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12488                           struct btrfs_root *csum_root,
12489                           int search_fs_tree)
12490 {
12491         if (search_fs_tree)
12492                 return fill_csum_tree_from_fs(trans, csum_root);
12493         else
12494                 return fill_csum_tree_from_extent(trans, csum_root);
12495 }
12496
12497 static void free_roots_info_cache(void)
12498 {
12499         if (!roots_info_cache)
12500                 return;
12501
12502         while (!cache_tree_empty(roots_info_cache)) {
12503                 struct cache_extent *entry;
12504                 struct root_item_info *rii;
12505
12506                 entry = first_cache_extent(roots_info_cache);
12507                 if (!entry)
12508                         break;
12509                 remove_cache_extent(roots_info_cache, entry);
12510                 rii = container_of(entry, struct root_item_info, cache_extent);
12511                 free(rii);
12512         }
12513
12514         free(roots_info_cache);
12515         roots_info_cache = NULL;
12516 }
12517
12518 static int build_roots_info_cache(struct btrfs_fs_info *info)
12519 {
12520         int ret = 0;
12521         struct btrfs_key key;
12522         struct extent_buffer *leaf;
12523         struct btrfs_path path;
12524
12525         if (!roots_info_cache) {
12526                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12527                 if (!roots_info_cache)
12528                         return -ENOMEM;
12529                 cache_tree_init(roots_info_cache);
12530         }
12531
12532         btrfs_init_path(&path);
12533         key.objectid = 0;
12534         key.type = BTRFS_EXTENT_ITEM_KEY;
12535         key.offset = 0;
12536         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12537         if (ret < 0)
12538                 goto out;
12539         leaf = path.nodes[0];
12540
12541         while (1) {
12542                 struct btrfs_key found_key;
12543                 struct btrfs_extent_item *ei;
12544                 struct btrfs_extent_inline_ref *iref;
12545                 int slot = path.slots[0];
12546                 int type;
12547                 u64 flags;
12548                 u64 root_id;
12549                 u8 level;
12550                 struct cache_extent *entry;
12551                 struct root_item_info *rii;
12552
12553                 if (slot >= btrfs_header_nritems(leaf)) {
12554                         ret = btrfs_next_leaf(info->extent_root, &path);
12555                         if (ret < 0) {
12556                                 break;
12557                         } else if (ret) {
12558                                 ret = 0;
12559                                 break;
12560                         }
12561                         leaf = path.nodes[0];
12562                         slot = path.slots[0];
12563                 }
12564
12565                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12566
12567                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12568                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12569                         goto next;
12570
12571                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12572                 flags = btrfs_extent_flags(leaf, ei);
12573
12574                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12575                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12576                         goto next;
12577
12578                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12579                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12580                         level = found_key.offset;
12581                 } else {
12582                         struct btrfs_tree_block_info *binfo;
12583
12584                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12585                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12586                         level = btrfs_tree_block_level(leaf, binfo);
12587                 }
12588
12589                 /*
12590                  * For a root extent, it must be of the following type and the
12591                  * first (and only one) iref in the item.
12592                  */
12593                 type = btrfs_extent_inline_ref_type(leaf, iref);
12594                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12595                         goto next;
12596
12597                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12598                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12599                 if (!entry) {
12600                         rii = malloc(sizeof(struct root_item_info));
12601                         if (!rii) {
12602                                 ret = -ENOMEM;
12603                                 goto out;
12604                         }
12605                         rii->cache_extent.start = root_id;
12606                         rii->cache_extent.size = 1;
12607                         rii->level = (u8)-1;
12608                         entry = &rii->cache_extent;
12609                         ret = insert_cache_extent(roots_info_cache, entry);
12610                         ASSERT(ret == 0);
12611                 } else {
12612                         rii = container_of(entry, struct root_item_info,
12613                                            cache_extent);
12614                 }
12615
12616                 ASSERT(rii->cache_extent.start == root_id);
12617                 ASSERT(rii->cache_extent.size == 1);
12618
12619                 if (level > rii->level || rii->level == (u8)-1) {
12620                         rii->level = level;
12621                         rii->bytenr = found_key.objectid;
12622                         rii->gen = btrfs_extent_generation(leaf, ei);
12623                         rii->node_count = 1;
12624                 } else if (level == rii->level) {
12625                         rii->node_count++;
12626                 }
12627 next:
12628                 path.slots[0]++;
12629         }
12630
12631 out:
12632         btrfs_release_path(&path);
12633
12634         return ret;
12635 }
12636
12637 static int maybe_repair_root_item(struct btrfs_path *path,
12638                                   const struct btrfs_key *root_key,
12639                                   const int read_only_mode)
12640 {
12641         const u64 root_id = root_key->objectid;
12642         struct cache_extent *entry;
12643         struct root_item_info *rii;
12644         struct btrfs_root_item ri;
12645         unsigned long offset;
12646
12647         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12648         if (!entry) {
12649                 fprintf(stderr,
12650                         "Error: could not find extent items for root %llu\n",
12651                         root_key->objectid);
12652                 return -ENOENT;
12653         }
12654
12655         rii = container_of(entry, struct root_item_info, cache_extent);
12656         ASSERT(rii->cache_extent.start == root_id);
12657         ASSERT(rii->cache_extent.size == 1);
12658
12659         if (rii->node_count != 1) {
12660                 fprintf(stderr,
12661                         "Error: could not find btree root extent for root %llu\n",
12662                         root_id);
12663                 return -ENOENT;
12664         }
12665
12666         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12667         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12668
12669         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12670             btrfs_root_level(&ri) != rii->level ||
12671             btrfs_root_generation(&ri) != rii->gen) {
12672
12673                 /*
12674                  * If we're in repair mode but our caller told us to not update
12675                  * the root item, i.e. just check if it needs to be updated, don't
12676                  * print this message, since the caller will call us again shortly
12677                  * for the same root item without read only mode (the caller will
12678                  * open a transaction first).
12679                  */
12680                 if (!(read_only_mode && repair))
12681                         fprintf(stderr,
12682                                 "%sroot item for root %llu,"
12683                                 " current bytenr %llu, current gen %llu, current level %u,"
12684                                 " new bytenr %llu, new gen %llu, new level %u\n",
12685                                 (read_only_mode ? "" : "fixing "),
12686                                 root_id,
12687                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12688                                 btrfs_root_level(&ri),
12689                                 rii->bytenr, rii->gen, rii->level);
12690
12691                 if (btrfs_root_generation(&ri) > rii->gen) {
12692                         fprintf(stderr,
12693                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12694                                 root_id, btrfs_root_generation(&ri), rii->gen);
12695                         return -EINVAL;
12696                 }
12697
12698                 if (!read_only_mode) {
12699                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12700                         btrfs_set_root_level(&ri, rii->level);
12701                         btrfs_set_root_generation(&ri, rii->gen);
12702                         write_extent_buffer(path->nodes[0], &ri,
12703                                             offset, sizeof(ri));
12704                 }
12705
12706                 return 1;
12707         }
12708
12709         return 0;
12710 }
12711
12712 /*
12713  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12714  * caused read-only snapshots to be corrupted if they were created at a moment
12715  * when the source subvolume/snapshot had orphan items. The issue was that the
12716  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12717  * node instead of the post orphan cleanup root node.
12718  * So this function, and its callees, just detects and fixes those cases. Even
12719  * though the regression was for read-only snapshots, this function applies to
12720  * any snapshot/subvolume root.
12721  * This must be run before any other repair code - not doing it so, makes other
12722  * repair code delete or modify backrefs in the extent tree for example, which
12723  * will result in an inconsistent fs after repairing the root items.
12724  */
12725 static int repair_root_items(struct btrfs_fs_info *info)
12726 {
12727         struct btrfs_path path;
12728         struct btrfs_key key;
12729         struct extent_buffer *leaf;
12730         struct btrfs_trans_handle *trans = NULL;
12731         int ret = 0;
12732         int bad_roots = 0;
12733         int need_trans = 0;
12734
12735         btrfs_init_path(&path);
12736
12737         ret = build_roots_info_cache(info);
12738         if (ret)
12739                 goto out;
12740
12741         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12742         key.type = BTRFS_ROOT_ITEM_KEY;
12743         key.offset = 0;
12744
12745 again:
12746         /*
12747          * Avoid opening and committing transactions if a leaf doesn't have
12748          * any root items that need to be fixed, so that we avoid rotating
12749          * backup roots unnecessarily.
12750          */
12751         if (need_trans) {
12752                 trans = btrfs_start_transaction(info->tree_root, 1);
12753                 if (IS_ERR(trans)) {
12754                         ret = PTR_ERR(trans);
12755                         goto out;
12756                 }
12757         }
12758
12759         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12760                                 0, trans ? 1 : 0);
12761         if (ret < 0)
12762                 goto out;
12763         leaf = path.nodes[0];
12764
12765         while (1) {
12766                 struct btrfs_key found_key;
12767
12768                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12769                         int no_more_keys = find_next_key(&path, &key);
12770
12771                         btrfs_release_path(&path);
12772                         if (trans) {
12773                                 ret = btrfs_commit_transaction(trans,
12774                                                                info->tree_root);
12775                                 trans = NULL;
12776                                 if (ret < 0)
12777                                         goto out;
12778                         }
12779                         need_trans = 0;
12780                         if (no_more_keys)
12781                                 break;
12782                         goto again;
12783                 }
12784
12785                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12786
12787                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12788                         goto next;
12789                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12790                         goto next;
12791
12792                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12793                 if (ret < 0)
12794                         goto out;
12795                 if (ret) {
12796                         if (!trans && repair) {
12797                                 need_trans = 1;
12798                                 key = found_key;
12799                                 btrfs_release_path(&path);
12800                                 goto again;
12801                         }
12802                         bad_roots++;
12803                 }
12804 next:
12805                 path.slots[0]++;
12806         }
12807         ret = 0;
12808 out:
12809         free_roots_info_cache();
12810         btrfs_release_path(&path);
12811         if (trans)
12812                 btrfs_commit_transaction(trans, info->tree_root);
12813         if (ret < 0)
12814                 return ret;
12815
12816         return bad_roots;
12817 }
12818
12819 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12820 {
12821         struct btrfs_trans_handle *trans;
12822         struct btrfs_block_group_cache *bg_cache;
12823         u64 current = 0;
12824         int ret = 0;
12825
12826         /* Clear all free space cache inodes and its extent data */
12827         while (1) {
12828                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12829                 if (!bg_cache)
12830                         break;
12831                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12832                 if (ret < 0)
12833                         return ret;
12834                 current = bg_cache->key.objectid + bg_cache->key.offset;
12835         }
12836
12837         /* Don't forget to set cache_generation to -1 */
12838         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12839         if (IS_ERR(trans)) {
12840                 error("failed to update super block cache generation");
12841                 return PTR_ERR(trans);
12842         }
12843         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12844         btrfs_commit_transaction(trans, fs_info->tree_root);
12845
12846         return ret;
12847 }
12848
12849 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12850                 int clear_version)
12851 {
12852         int ret = 0;
12853
12854         if (clear_version == 1) {
12855                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12856                         error(
12857                 "free space cache v2 detected, use --clear-space-cache v2");
12858                         ret = 1;
12859                         goto close_out;
12860                 }
12861                 printf("Clearing free space cache\n");
12862                 ret = clear_free_space_cache(fs_info);
12863                 if (ret) {
12864                         error("failed to clear free space cache");
12865                         ret = 1;
12866                 } else {
12867                         printf("Free space cache cleared\n");
12868                 }
12869         } else if (clear_version == 2) {
12870                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12871                         printf("no free space cache v2 to clear\n");
12872                         ret = 0;
12873                         goto close_out;
12874                 }
12875                 printf("Clear free space cache v2\n");
12876                 ret = btrfs_clear_free_space_tree(fs_info);
12877                 if (ret) {
12878                         error("failed to clear free space cache v2: %d", ret);
12879                         ret = 1;
12880                 } else {
12881                         printf("free space cache v2 cleared\n");
12882                 }
12883         }
12884 close_out:
12885         return ret;
12886 }
12887
12888 const char * const cmd_check_usage[] = {
12889         "btrfs check [options] <device>",
12890         "Check structural integrity of a filesystem (unmounted).",
12891         "Check structural integrity of an unmounted filesystem. Verify internal",
12892         "trees' consistency and item connectivity. In the repair mode try to",
12893         "fix the problems found. ",
12894         "WARNING: the repair mode is considered dangerous",
12895         "",
12896         "-s|--super <superblock>     use this superblock copy",
12897         "-b|--backup                 use the first valid backup root copy",
12898         "--force                     skip mount checks, repair is not possible",
12899         "--repair                    try to repair the filesystem",
12900         "--readonly                  run in read-only mode (default)",
12901         "--init-csum-tree            create a new CRC tree",
12902         "--init-extent-tree          create a new extent tree",
12903         "--mode <MODE>               allows choice of memory/IO trade-offs",
12904         "                            where MODE is one of:",
12905         "                            original - read inodes and extents to memory (requires",
12906         "                                       more memory, does less IO)",
12907         "                            lowmem   - try to use less memory but read blocks again",
12908         "                                       when needed",
12909         "--check-data-csum           verify checksums of data blocks",
12910         "-Q|--qgroup-report          print a report on qgroup consistency",
12911         "-E|--subvol-extents <subvolid>",
12912         "                            print subvolume extents and sharing state",
12913         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12914         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12915         "-p|--progress               indicate progress",
12916         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12917         NULL
12918 };
12919
12920 int cmd_check(int argc, char **argv)
12921 {
12922         struct cache_tree root_cache;
12923         struct btrfs_root *root;
12924         struct btrfs_fs_info *info;
12925         u64 bytenr = 0;
12926         u64 subvolid = 0;
12927         u64 tree_root_bytenr = 0;
12928         u64 chunk_root_bytenr = 0;
12929         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12930         int ret = 0;
12931         int err = 0;
12932         u64 num;
12933         int init_csum_tree = 0;
12934         int readonly = 0;
12935         int clear_space_cache = 0;
12936         int qgroup_report = 0;
12937         int qgroups_repaired = 0;
12938         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12939         int force = 0;
12940
12941         while(1) {
12942                 int c;
12943                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12944                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12945                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12946                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
12947                         GETOPT_VAL_FORCE };
12948                 static const struct option long_options[] = {
12949                         { "super", required_argument, NULL, 's' },
12950                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12951                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12952                         { "init-csum-tree", no_argument, NULL,
12953                                 GETOPT_VAL_INIT_CSUM },
12954                         { "init-extent-tree", no_argument, NULL,
12955                                 GETOPT_VAL_INIT_EXTENT },
12956                         { "check-data-csum", no_argument, NULL,
12957                                 GETOPT_VAL_CHECK_CSUM },
12958                         { "backup", no_argument, NULL, 'b' },
12959                         { "subvol-extents", required_argument, NULL, 'E' },
12960                         { "qgroup-report", no_argument, NULL, 'Q' },
12961                         { "tree-root", required_argument, NULL, 'r' },
12962                         { "chunk-root", required_argument, NULL,
12963                                 GETOPT_VAL_CHUNK_TREE },
12964                         { "progress", no_argument, NULL, 'p' },
12965                         { "mode", required_argument, NULL,
12966                                 GETOPT_VAL_MODE },
12967                         { "clear-space-cache", required_argument, NULL,
12968                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12969                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
12970                         { NULL, 0, NULL, 0}
12971                 };
12972
12973                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12974                 if (c < 0)
12975                         break;
12976                 switch(c) {
12977                         case 'a': /* ignored */ break;
12978                         case 'b':
12979                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12980                                 break;
12981                         case 's':
12982                                 num = arg_strtou64(optarg);
12983                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12984                                         error(
12985                                         "super mirror should be less than %d",
12986                                                 BTRFS_SUPER_MIRROR_MAX);
12987                                         exit(1);
12988                                 }
12989                                 bytenr = btrfs_sb_offset(((int)num));
12990                                 printf("using SB copy %llu, bytenr %llu\n", num,
12991                                        (unsigned long long)bytenr);
12992                                 break;
12993                         case 'Q':
12994                                 qgroup_report = 1;
12995                                 break;
12996                         case 'E':
12997                                 subvolid = arg_strtou64(optarg);
12998                                 break;
12999                         case 'r':
13000                                 tree_root_bytenr = arg_strtou64(optarg);
13001                                 break;
13002                         case GETOPT_VAL_CHUNK_TREE:
13003                                 chunk_root_bytenr = arg_strtou64(optarg);
13004                                 break;
13005                         case 'p':
13006                                 ctx.progress_enabled = true;
13007                                 break;
13008                         case '?':
13009                         case 'h':
13010                                 usage(cmd_check_usage);
13011                         case GETOPT_VAL_REPAIR:
13012                                 printf("enabling repair mode\n");
13013                                 repair = 1;
13014                                 ctree_flags |= OPEN_CTREE_WRITES;
13015                                 break;
13016                         case GETOPT_VAL_READONLY:
13017                                 readonly = 1;
13018                                 break;
13019                         case GETOPT_VAL_INIT_CSUM:
13020                                 printf("Creating a new CRC tree\n");
13021                                 init_csum_tree = 1;
13022                                 repair = 1;
13023                                 ctree_flags |= OPEN_CTREE_WRITES;
13024                                 break;
13025                         case GETOPT_VAL_INIT_EXTENT:
13026                                 init_extent_tree = 1;
13027                                 ctree_flags |= (OPEN_CTREE_WRITES |
13028                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13029                                 repair = 1;
13030                                 break;
13031                         case GETOPT_VAL_CHECK_CSUM:
13032                                 check_data_csum = 1;
13033                                 break;
13034                         case GETOPT_VAL_MODE:
13035                                 check_mode = parse_check_mode(optarg);
13036                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13037                                         error("unknown mode: %s", optarg);
13038                                         exit(1);
13039                                 }
13040                                 break;
13041                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13042                                 if (strcmp(optarg, "v1") == 0) {
13043                                         clear_space_cache = 1;
13044                                 } else if (strcmp(optarg, "v2") == 0) {
13045                                         clear_space_cache = 2;
13046                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13047                                 } else {
13048                                         error(
13049                 "invalid argument to --clear-space-cache, must be v1 or v2");
13050                                         exit(1);
13051                                 }
13052                                 ctree_flags |= OPEN_CTREE_WRITES;
13053                                 break;
13054                         case GETOPT_VAL_FORCE:
13055                                 force = 1;
13056                                 break;
13057                 }
13058         }
13059
13060         if (check_argc_exact(argc - optind, 1))
13061                 usage(cmd_check_usage);
13062
13063         if (ctx.progress_enabled) {
13064                 ctx.tp = TASK_NOTHING;
13065                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13066         }
13067
13068         /* This check is the only reason for --readonly to exist */
13069         if (readonly && repair) {
13070                 error("repair options are not compatible with --readonly");
13071                 exit(1);
13072         }
13073
13074         /*
13075          * Not supported yet
13076          */
13077         if (repair && check_mode == CHECK_MODE_LOWMEM) {
13078                 error("low memory mode doesn't support repair yet");
13079                 exit(1);
13080         }
13081
13082         radix_tree_init();
13083         cache_tree_init(&root_cache);
13084
13085         ret = check_mounted(argv[optind]);
13086         if (!force) {
13087                 if (ret < 0) {
13088                         error("could not check mount status: %s",
13089                                         strerror(-ret));
13090                         err |= !!ret;
13091                         goto err_out;
13092                 } else if (ret) {
13093                         error(
13094 "%s is currently mounted, use --force if you really intend to check the filesystem",
13095                                 argv[optind]);
13096                         ret = -EBUSY;
13097                         err |= !!ret;
13098                         goto err_out;
13099                 }
13100         } else {
13101                 if (repair) {
13102                         error("repair and --force is not yet supported");
13103                         ret = 1;
13104                         err |= !!ret;
13105                         goto err_out;
13106                 }
13107                 if (ret < 0) {
13108                         warning(
13109 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13110                                 argv[optind]);
13111                 } else if (ret) {
13112                         warning(
13113                         "filesystem mounted, continuing because of --force");
13114                 }
13115                 /* A block device is mounted in exclusive mode by kernel */
13116                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13117         }
13118
13119         /* only allow partial opening under repair mode */
13120         if (repair)
13121                 ctree_flags |= OPEN_CTREE_PARTIAL;
13122
13123         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13124                                   chunk_root_bytenr, ctree_flags);
13125         if (!info) {
13126                 error("cannot open file system");
13127                 ret = -EIO;
13128                 err |= !!ret;
13129                 goto err_out;
13130         }
13131
13132         global_info = info;
13133         root = info->fs_root;
13134         uuid_unparse(info->super_copy->fsid, uuidbuf);
13135
13136         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13137
13138         /*
13139          * Check the bare minimum before starting anything else that could rely
13140          * on it, namely the tree roots, any local consistency checks
13141          */
13142         if (!extent_buffer_uptodate(info->tree_root->node) ||
13143             !extent_buffer_uptodate(info->dev_root->node) ||
13144             !extent_buffer_uptodate(info->chunk_root->node)) {
13145                 error("critical roots corrupted, unable to check the filesystem");
13146                 err |= !!ret;
13147                 ret = -EIO;
13148                 goto close_out;
13149         }
13150
13151         if (clear_space_cache) {
13152                 ret = do_clear_free_space_cache(info, clear_space_cache);
13153                 err |= !!ret;
13154                 goto close_out;
13155         }
13156
13157         /*
13158          * repair mode will force us to commit transaction which
13159          * will make us fail to load log tree when mounting.
13160          */
13161         if (repair && btrfs_super_log_root(info->super_copy)) {
13162                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13163                 if (!ret) {
13164                         ret = 1;
13165                         err |= !!ret;
13166                         goto close_out;
13167                 }
13168                 ret = zero_log_tree(root);
13169                 err |= !!ret;
13170                 if (ret) {
13171                         error("failed to zero log tree: %d", ret);
13172                         goto close_out;
13173                 }
13174         }
13175
13176         if (qgroup_report) {
13177                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13178                        uuidbuf);
13179                 ret = qgroup_verify_all(info);
13180                 err |= !!ret;
13181                 if (ret == 0)
13182                         report_qgroups(1);
13183                 goto close_out;
13184         }
13185         if (subvolid) {
13186                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13187                        subvolid, argv[optind], uuidbuf);
13188                 ret = print_extent_state(info, subvolid);
13189                 err |= !!ret;
13190                 goto close_out;
13191         }
13192
13193         if (init_extent_tree || init_csum_tree) {
13194                 struct btrfs_trans_handle *trans;
13195
13196                 trans = btrfs_start_transaction(info->extent_root, 0);
13197                 if (IS_ERR(trans)) {
13198                         error("error starting transaction");
13199                         ret = PTR_ERR(trans);
13200                         err |= !!ret;
13201                         goto close_out;
13202                 }
13203
13204                 if (init_extent_tree) {
13205                         printf("Creating a new extent tree\n");
13206                         ret = reinit_extent_tree(trans, info);
13207                         err |= !!ret;
13208                         if (ret)
13209                                 goto close_out;
13210                 }
13211
13212                 if (init_csum_tree) {
13213                         printf("Reinitialize checksum tree\n");
13214                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13215                         if (ret) {
13216                                 error("checksum tree initialization failed: %d",
13217                                                 ret);
13218                                 ret = -EIO;
13219                                 err |= !!ret;
13220                                 goto close_out;
13221                         }
13222
13223                         ret = fill_csum_tree(trans, info->csum_root,
13224                                              init_extent_tree);
13225                         err |= !!ret;
13226                         if (ret) {
13227                                 error("checksum tree refilling failed: %d", ret);
13228                                 return -EIO;
13229                         }
13230                 }
13231                 /*
13232                  * Ok now we commit and run the normal fsck, which will add
13233                  * extent entries for all of the items it finds.
13234                  */
13235                 ret = btrfs_commit_transaction(trans, info->extent_root);
13236                 err |= !!ret;
13237                 if (ret)
13238                         goto close_out;
13239         }
13240         if (!extent_buffer_uptodate(info->extent_root->node)) {
13241                 error("critical: extent_root, unable to check the filesystem");
13242                 ret = -EIO;
13243                 err |= !!ret;
13244                 goto close_out;
13245         }
13246         if (!extent_buffer_uptodate(info->csum_root->node)) {
13247                 error("critical: csum_root, unable to check the filesystem");
13248                 ret = -EIO;
13249                 err |= !!ret;
13250                 goto close_out;
13251         }
13252
13253         ret = do_check_chunks_and_extents(info);
13254         err |= !!ret;
13255         if (ret)
13256                 error(
13257                 "errors found in extent allocation tree or chunk allocation");
13258
13259         ret = repair_root_items(info);
13260         err |= !!ret;
13261         if (ret < 0) {
13262                 error("failed to repair root items: %s", strerror(-ret));
13263                 goto close_out;
13264         }
13265         if (repair) {
13266                 fprintf(stderr, "Fixed %d roots.\n", ret);
13267                 ret = 0;
13268         } else if (ret > 0) {
13269                 fprintf(stderr,
13270                        "Found %d roots with an outdated root item.\n",
13271                        ret);
13272                 fprintf(stderr,
13273                         "Please run a filesystem check with the option --repair to fix them.\n");
13274                 ret = 1;
13275                 err |= !!ret;
13276                 goto close_out;
13277         }
13278
13279         if (!ctx.progress_enabled) {
13280                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13281                         fprintf(stderr, "checking free space tree\n");
13282                 else
13283                         fprintf(stderr, "checking free space cache\n");
13284         }
13285         ret = check_space_cache(root);
13286         err |= !!ret;
13287         if (ret) {
13288                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13289                         error("errors found in free space tree");
13290                 else
13291                         error("errors found in free space cache");
13292                 goto out;
13293         }
13294
13295         /*
13296          * We used to have to have these hole extents in between our real
13297          * extents so if we don't have this flag set we need to make sure there
13298          * are no gaps in the file extents for inodes, otherwise we can just
13299          * ignore it when this happens.
13300          */
13301         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13302         ret = do_check_fs_roots(info, &root_cache);
13303         err |= !!ret;
13304         if (ret) {
13305                 error("errors found in fs roots");
13306                 goto out;
13307         }
13308
13309         fprintf(stderr, "checking csums\n");
13310         ret = check_csums(root);
13311         err |= !!ret;
13312         if (ret) {
13313                 error("errors found in csum tree");
13314                 goto out;
13315         }
13316
13317         fprintf(stderr, "checking root refs\n");
13318         /* For low memory mode, check_fs_roots_v2 handles root refs */
13319         if (check_mode != CHECK_MODE_LOWMEM) {
13320                 ret = check_root_refs(root, &root_cache);
13321                 err |= !!ret;
13322                 if (ret) {
13323                         error("errors found in root refs");
13324                         goto out;
13325                 }
13326         }
13327
13328         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13329                 struct extent_buffer *eb;
13330
13331                 eb = list_first_entry(&root->fs_info->recow_ebs,
13332                                       struct extent_buffer, recow);
13333                 list_del_init(&eb->recow);
13334                 ret = recow_extent_buffer(root, eb);
13335                 err |= !!ret;
13336                 if (ret) {
13337                         error("fails to fix transid errors");
13338                         break;
13339                 }
13340         }
13341
13342         while (!list_empty(&delete_items)) {
13343                 struct bad_item *bad;
13344
13345                 bad = list_first_entry(&delete_items, struct bad_item, list);
13346                 list_del_init(&bad->list);
13347                 if (repair) {
13348                         ret = delete_bad_item(root, bad);
13349                         err |= !!ret;
13350                 }
13351                 free(bad);
13352         }
13353
13354         if (info->quota_enabled) {
13355                 fprintf(stderr, "checking quota groups\n");
13356                 ret = qgroup_verify_all(info);
13357                 err |= !!ret;
13358                 if (ret) {
13359                         error("failed to check quota groups");
13360                         goto out;
13361                 }
13362                 report_qgroups(0);
13363                 ret = repair_qgroups(info, &qgroups_repaired);
13364                 err |= !!ret;
13365                 if (err) {
13366                         error("failed to repair quota groups");
13367                         goto out;
13368                 }
13369                 ret = 0;
13370         }
13371
13372         if (!list_empty(&root->fs_info->recow_ebs)) {
13373                 error("transid errors in file system");
13374                 ret = 1;
13375                 err |= !!ret;
13376         }
13377 out:
13378         printf("found %llu bytes used, ",
13379                (unsigned long long)bytes_used);
13380         if (err)
13381                 printf("error(s) found\n");
13382         else
13383                 printf("no error found\n");
13384         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13385         printf("total tree bytes: %llu\n",
13386                (unsigned long long)total_btree_bytes);
13387         printf("total fs tree bytes: %llu\n",
13388                (unsigned long long)total_fs_tree_bytes);
13389         printf("total extent tree bytes: %llu\n",
13390                (unsigned long long)total_extent_tree_bytes);
13391         printf("btree space waste bytes: %llu\n",
13392                (unsigned long long)btree_space_waste);
13393         printf("file data blocks allocated: %llu\n referenced %llu\n",
13394                 (unsigned long long)data_bytes_allocated,
13395                 (unsigned long long)data_bytes_referenced);
13396
13397         free_qgroup_counts();
13398         free_root_recs_tree(&root_cache);
13399 close_out:
13400         close_ctree(root);
13401 err_out:
13402         if (ctx.progress_enabled)
13403                 task_deinit(ctx.info);
13404
13405         return err;
13406 }