ab24aaa63ed45a32a5f3be45064e8fc69ce79f6d
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         struct rb_node node;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
103 {
104         return rb_entry(node, struct extent_backref, node);
105 }
106
107 struct data_backref {
108         struct extent_backref node;
109         union {
110                 u64 parent;
111                 u64 root;
112         };
113         u64 owner;
114         u64 offset;
115         u64 disk_bytenr;
116         u64 bytes;
117         u64 ram_bytes;
118         u32 num_refs;
119         u32 found_ref;
120 };
121
122 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
123 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
124 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
125 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
126 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
127 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
128 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
129 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
130 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
131 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
132 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
133 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
134 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
135 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
136 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
137 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
138 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
139
140 static inline struct data_backref* to_data_backref(struct extent_backref *back)
141 {
142         return container_of(back, struct data_backref, node);
143 }
144
145 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
146 {
147         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
148         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
149         struct data_backref *back1 = to_data_backref(ext1);
150         struct data_backref *back2 = to_data_backref(ext2);
151
152         WARN_ON(!ext1->is_data);
153         WARN_ON(!ext2->is_data);
154
155         /* parent and root are a union, so this covers both */
156         if (back1->parent > back2->parent)
157                 return 1;
158         if (back1->parent < back2->parent)
159                 return -1;
160
161         /* This is a full backref and the parents match. */
162         if (back1->node.full_backref)
163                 return 0;
164
165         if (back1->owner > back2->owner)
166                 return 1;
167         if (back1->owner < back2->owner)
168                 return -1;
169
170         if (back1->offset > back2->offset)
171                 return 1;
172         if (back1->offset < back2->offset)
173                 return -1;
174
175         if (back1->found_ref && back2->found_ref) {
176                 if (back1->disk_bytenr > back2->disk_bytenr)
177                         return 1;
178                 if (back1->disk_bytenr < back2->disk_bytenr)
179                         return -1;
180
181                 if (back1->bytes > back2->bytes)
182                         return 1;
183                 if (back1->bytes < back2->bytes)
184                         return -1;
185         }
186
187         return 0;
188 }
189
190 /*
191  * Much like data_backref, just removed the undetermined members
192  * and change it to use list_head.
193  * During extent scan, it is stored in root->orphan_data_extent.
194  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
195  */
196 struct orphan_data_extent {
197         struct list_head list;
198         u64 root;
199         u64 objectid;
200         u64 offset;
201         u64 disk_bytenr;
202         u64 disk_len;
203 };
204
205 struct tree_backref {
206         struct extent_backref node;
207         union {
208                 u64 parent;
209                 u64 root;
210         };
211 };
212
213 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
214 {
215         return container_of(back, struct tree_backref, node);
216 }
217
218 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
219 {
220         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
221         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
222         struct tree_backref *back1 = to_tree_backref(ext1);
223         struct tree_backref *back2 = to_tree_backref(ext2);
224
225         WARN_ON(ext1->is_data);
226         WARN_ON(ext2->is_data);
227
228         /* parent and root are a union, so this covers both */
229         if (back1->parent > back2->parent)
230                 return 1;
231         if (back1->parent < back2->parent)
232                 return -1;
233
234         return 0;
235 }
236
237 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
238 {
239         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
240         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
241
242         if (ext1->is_data > ext2->is_data)
243                 return 1;
244
245         if (ext1->is_data < ext2->is_data)
246                 return -1;
247
248         if (ext1->full_backref > ext2->full_backref)
249                 return 1;
250         if (ext1->full_backref < ext2->full_backref)
251                 return -1;
252
253         if (ext1->is_data)
254                 return compare_data_backref(node1, node2);
255         else
256                 return compare_tree_backref(node1, node2);
257 }
258
259 /* Explicit initialization for extent_record::flag_block_full_backref */
260 enum { FLAG_UNSET = 2 };
261
262 struct extent_record {
263         struct list_head backrefs;
264         struct list_head dups;
265         struct rb_root backref_tree;
266         struct list_head list;
267         struct cache_extent cache;
268         struct btrfs_disk_key parent_key;
269         u64 start;
270         u64 max_size;
271         u64 nr;
272         u64 refs;
273         u64 extent_item_refs;
274         u64 generation;
275         u64 parent_generation;
276         u64 info_objectid;
277         u32 num_duplicates;
278         u8 info_level;
279         unsigned int flag_block_full_backref:2;
280         unsigned int found_rec:1;
281         unsigned int content_checked:1;
282         unsigned int owner_ref_checked:1;
283         unsigned int is_root:1;
284         unsigned int metadata:1;
285         unsigned int bad_full_backref:1;
286         unsigned int crossing_stripes:1;
287         unsigned int wrong_chunk_type:1;
288 };
289
290 static inline struct extent_record* to_extent_record(struct list_head *entry)
291 {
292         return container_of(entry, struct extent_record, list);
293 }
294
295 struct inode_backref {
296         struct list_head list;
297         unsigned int found_dir_item:1;
298         unsigned int found_dir_index:1;
299         unsigned int found_inode_ref:1;
300         u8 filetype;
301         u8 ref_type;
302         int errors;
303         u64 dir;
304         u64 index;
305         u16 namelen;
306         char name[0];
307 };
308
309 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
310 {
311         return list_entry(entry, struct inode_backref, list);
312 }
313
314 struct root_item_record {
315         struct list_head list;
316         u64 objectid;
317         u64 bytenr;
318         u64 last_snapshot;
319         u8 level;
320         u8 drop_level;
321         struct btrfs_key drop_key;
322 };
323
324 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
325 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
326 #define REF_ERR_NO_INODE_REF            (1 << 2)
327 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
328 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
329 #define REF_ERR_DUP_INODE_REF           (1 << 5)
330 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
331 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
332 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
333 #define REF_ERR_NO_ROOT_REF             (1 << 9)
334 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
335 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
336 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
337
338 struct file_extent_hole {
339         struct rb_node node;
340         u64 start;
341         u64 len;
342 };
343
344 struct inode_record {
345         struct list_head backrefs;
346         unsigned int checked:1;
347         unsigned int merging:1;
348         unsigned int found_inode_item:1;
349         unsigned int found_dir_item:1;
350         unsigned int found_file_extent:1;
351         unsigned int found_csum_item:1;
352         unsigned int some_csum_missing:1;
353         unsigned int nodatasum:1;
354         int errors;
355
356         u64 ino;
357         u32 nlink;
358         u32 imode;
359         u64 isize;
360         u64 nbytes;
361
362         u32 found_link;
363         u64 found_size;
364         u64 extent_start;
365         u64 extent_end;
366         struct rb_root holes;
367         struct list_head orphan_extents;
368
369         u32 refs;
370 };
371
372 #define I_ERR_NO_INODE_ITEM             (1 << 0)
373 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
374 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
375 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
376 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
377 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
378 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
379 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
380 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
381 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
382 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
383 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
384 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
385 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
386 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
387
388 struct root_backref {
389         struct list_head list;
390         unsigned int found_dir_item:1;
391         unsigned int found_dir_index:1;
392         unsigned int found_back_ref:1;
393         unsigned int found_forward_ref:1;
394         unsigned int reachable:1;
395         int errors;
396         u64 ref_root;
397         u64 dir;
398         u64 index;
399         u16 namelen;
400         char name[0];
401 };
402
403 static inline struct root_backref* to_root_backref(struct list_head *entry)
404 {
405         return list_entry(entry, struct root_backref, list);
406 }
407
408 struct root_record {
409         struct list_head backrefs;
410         struct cache_extent cache;
411         unsigned int found_root_item:1;
412         u64 objectid;
413         u32 found_ref;
414 };
415
416 struct ptr_node {
417         struct cache_extent cache;
418         void *data;
419 };
420
421 struct shared_node {
422         struct cache_extent cache;
423         struct cache_tree root_cache;
424         struct cache_tree inode_cache;
425         struct inode_record *current;
426         u32 refs;
427 };
428
429 struct block_info {
430         u64 start;
431         u32 size;
432 };
433
434 struct walk_control {
435         struct cache_tree shared;
436         struct shared_node *nodes[BTRFS_MAX_LEVEL];
437         int active_node;
438         int root_level;
439 };
440
441 struct bad_item {
442         struct btrfs_key key;
443         u64 root_id;
444         struct list_head list;
445 };
446
447 struct extent_entry {
448         u64 bytenr;
449         u64 bytes;
450         int count;
451         int broken;
452         struct list_head list;
453 };
454
455 struct root_item_info {
456         /* level of the root */
457         u8 level;
458         /* number of nodes at this level, must be 1 for a root */
459         int node_count;
460         u64 bytenr;
461         u64 gen;
462         struct cache_extent cache_extent;
463 };
464
465 /*
466  * Error bit for low memory mode check.
467  *
468  * Currently no caller cares about it yet.  Just internal use for error
469  * classification.
470  */
471 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
472 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
473 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
474 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
475 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
476 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
477 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
478 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
479 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
480 #define CHUNK_TYPE_MISMATCH     (1 << 8)
481
482 static void *print_status_check(void *p)
483 {
484         struct task_ctx *priv = p;
485         const char work_indicator[] = { '.', 'o', 'O', 'o' };
486         uint32_t count = 0;
487         static char *task_position_string[] = {
488                 "checking extents",
489                 "checking free space cache",
490                 "checking fs roots",
491         };
492
493         task_period_start(priv->info, 1000 /* 1s */);
494
495         if (priv->tp == TASK_NOTHING)
496                 return NULL;
497
498         while (1) {
499                 printf("%s [%c]\r", task_position_string[priv->tp],
500                                 work_indicator[count % 4]);
501                 count++;
502                 fflush(stdout);
503                 task_period_wait(priv->info);
504         }
505         return NULL;
506 }
507
508 static int print_status_return(void *p)
509 {
510         printf("\n");
511         fflush(stdout);
512
513         return 0;
514 }
515
516 static enum btrfs_check_mode parse_check_mode(const char *str)
517 {
518         if (strcmp(str, "lowmem") == 0)
519                 return CHECK_MODE_LOWMEM;
520         if (strcmp(str, "orig") == 0)
521                 return CHECK_MODE_ORIGINAL;
522         if (strcmp(str, "original") == 0)
523                 return CHECK_MODE_ORIGINAL;
524
525         return CHECK_MODE_UNKNOWN;
526 }
527
528 /* Compatible function to allow reuse of old codes */
529 static u64 first_extent_gap(struct rb_root *holes)
530 {
531         struct file_extent_hole *hole;
532
533         if (RB_EMPTY_ROOT(holes))
534                 return (u64)-1;
535
536         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
537         return hole->start;
538 }
539
540 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
541 {
542         struct file_extent_hole *hole1;
543         struct file_extent_hole *hole2;
544
545         hole1 = rb_entry(node1, struct file_extent_hole, node);
546         hole2 = rb_entry(node2, struct file_extent_hole, node);
547
548         if (hole1->start > hole2->start)
549                 return -1;
550         if (hole1->start < hole2->start)
551                 return 1;
552         /* Now hole1->start == hole2->start */
553         if (hole1->len >= hole2->len)
554                 /*
555                  * Hole 1 will be merge center
556                  * Same hole will be merged later
557                  */
558                 return -1;
559         /* Hole 2 will be merge center */
560         return 1;
561 }
562
563 /*
564  * Add a hole to the record
565  *
566  * This will do hole merge for copy_file_extent_holes(),
567  * which will ensure there won't be continuous holes.
568  */
569 static int add_file_extent_hole(struct rb_root *holes,
570                                 u64 start, u64 len)
571 {
572         struct file_extent_hole *hole;
573         struct file_extent_hole *prev = NULL;
574         struct file_extent_hole *next = NULL;
575
576         hole = malloc(sizeof(*hole));
577         if (!hole)
578                 return -ENOMEM;
579         hole->start = start;
580         hole->len = len;
581         /* Since compare will not return 0, no -EEXIST will happen */
582         rb_insert(holes, &hole->node, compare_hole);
583
584         /* simple merge with previous hole */
585         if (rb_prev(&hole->node))
586                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
587                                 node);
588         if (prev && prev->start + prev->len >= hole->start) {
589                 hole->len = hole->start + hole->len - prev->start;
590                 hole->start = prev->start;
591                 rb_erase(&prev->node, holes);
592                 free(prev);
593                 prev = NULL;
594         }
595
596         /* iterate merge with next holes */
597         while (1) {
598                 if (!rb_next(&hole->node))
599                         break;
600                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
601                                         node);
602                 if (hole->start + hole->len >= next->start) {
603                         if (hole->start + hole->len <= next->start + next->len)
604                                 hole->len = next->start + next->len -
605                                             hole->start;
606                         rb_erase(&next->node, holes);
607                         free(next);
608                         next = NULL;
609                 } else
610                         break;
611         }
612         return 0;
613 }
614
615 static int compare_hole_range(struct rb_node *node, void *data)
616 {
617         struct file_extent_hole *hole;
618         u64 start;
619
620         hole = (struct file_extent_hole *)data;
621         start = hole->start;
622
623         hole = rb_entry(node, struct file_extent_hole, node);
624         if (start < hole->start)
625                 return -1;
626         if (start >= hole->start && start < hole->start + hole->len)
627                 return 0;
628         return 1;
629 }
630
631 /*
632  * Delete a hole in the record
633  *
634  * This will do the hole split and is much restrict than add.
635  */
636 static int del_file_extent_hole(struct rb_root *holes,
637                                 u64 start, u64 len)
638 {
639         struct file_extent_hole *hole;
640         struct file_extent_hole tmp;
641         u64 prev_start = 0;
642         u64 prev_len = 0;
643         u64 next_start = 0;
644         u64 next_len = 0;
645         struct rb_node *node;
646         int have_prev = 0;
647         int have_next = 0;
648         int ret = 0;
649
650         tmp.start = start;
651         tmp.len = len;
652         node = rb_search(holes, &tmp, compare_hole_range, NULL);
653         if (!node)
654                 return -EEXIST;
655         hole = rb_entry(node, struct file_extent_hole, node);
656         if (start + len > hole->start + hole->len)
657                 return -EEXIST;
658
659         /*
660          * Now there will be no overlap, delete the hole and re-add the
661          * split(s) if they exists.
662          */
663         if (start > hole->start) {
664                 prev_start = hole->start;
665                 prev_len = start - hole->start;
666                 have_prev = 1;
667         }
668         if (hole->start + hole->len > start + len) {
669                 next_start = start + len;
670                 next_len = hole->start + hole->len - start - len;
671                 have_next = 1;
672         }
673         rb_erase(node, holes);
674         free(hole);
675         if (have_prev) {
676                 ret = add_file_extent_hole(holes, prev_start, prev_len);
677                 if (ret < 0)
678                         return ret;
679         }
680         if (have_next) {
681                 ret = add_file_extent_hole(holes, next_start, next_len);
682                 if (ret < 0)
683                         return ret;
684         }
685         return 0;
686 }
687
688 static int copy_file_extent_holes(struct rb_root *dst,
689                                   struct rb_root *src)
690 {
691         struct file_extent_hole *hole;
692         struct rb_node *node;
693         int ret = 0;
694
695         node = rb_first(src);
696         while (node) {
697                 hole = rb_entry(node, struct file_extent_hole, node);
698                 ret = add_file_extent_hole(dst, hole->start, hole->len);
699                 if (ret)
700                         break;
701                 node = rb_next(node);
702         }
703         return ret;
704 }
705
706 static void free_file_extent_holes(struct rb_root *holes)
707 {
708         struct rb_node *node;
709         struct file_extent_hole *hole;
710
711         node = rb_first(holes);
712         while (node) {
713                 hole = rb_entry(node, struct file_extent_hole, node);
714                 rb_erase(node, holes);
715                 free(hole);
716                 node = rb_first(holes);
717         }
718 }
719
720 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
721
722 static void record_root_in_trans(struct btrfs_trans_handle *trans,
723                                  struct btrfs_root *root)
724 {
725         if (root->last_trans != trans->transid) {
726                 root->track_dirty = 1;
727                 root->last_trans = trans->transid;
728                 root->commit_root = root->node;
729                 extent_buffer_get(root->node);
730         }
731 }
732
733 static u8 imode_to_type(u32 imode)
734 {
735 #define S_SHIFT 12
736         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
737                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
738                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
739                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
740                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
741                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
742                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
743                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
744         };
745
746         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
747 #undef S_SHIFT
748 }
749
750 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
751 {
752         struct device_record *rec1;
753         struct device_record *rec2;
754
755         rec1 = rb_entry(node1, struct device_record, node);
756         rec2 = rb_entry(node2, struct device_record, node);
757         if (rec1->devid > rec2->devid)
758                 return -1;
759         else if (rec1->devid < rec2->devid)
760                 return 1;
761         else
762                 return 0;
763 }
764
765 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
766 {
767         struct inode_record *rec;
768         struct inode_backref *backref;
769         struct inode_backref *orig;
770         struct inode_backref *tmp;
771         struct orphan_data_extent *src_orphan;
772         struct orphan_data_extent *dst_orphan;
773         struct rb_node *rb;
774         size_t size;
775         int ret;
776
777         rec = malloc(sizeof(*rec));
778         if (!rec)
779                 return ERR_PTR(-ENOMEM);
780         memcpy(rec, orig_rec, sizeof(*rec));
781         rec->refs = 1;
782         INIT_LIST_HEAD(&rec->backrefs);
783         INIT_LIST_HEAD(&rec->orphan_extents);
784         rec->holes = RB_ROOT;
785
786         list_for_each_entry(orig, &orig_rec->backrefs, list) {
787                 size = sizeof(*orig) + orig->namelen + 1;
788                 backref = malloc(size);
789                 if (!backref) {
790                         ret = -ENOMEM;
791                         goto cleanup;
792                 }
793                 memcpy(backref, orig, size);
794                 list_add_tail(&backref->list, &rec->backrefs);
795         }
796         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
797                 dst_orphan = malloc(sizeof(*dst_orphan));
798                 if (!dst_orphan) {
799                         ret = -ENOMEM;
800                         goto cleanup;
801                 }
802                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
803                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
804         }
805         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
806         if (ret < 0)
807                 goto cleanup_rb;
808
809         return rec;
810
811 cleanup_rb:
812         rb = rb_first(&rec->holes);
813         while (rb) {
814                 struct file_extent_hole *hole;
815
816                 hole = rb_entry(rb, struct file_extent_hole, node);
817                 rb = rb_next(rb);
818                 free(hole);
819         }
820
821 cleanup:
822         if (!list_empty(&rec->backrefs))
823                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
824                         list_del(&orig->list);
825                         free(orig);
826                 }
827
828         if (!list_empty(&rec->orphan_extents))
829                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
830                         list_del(&orig->list);
831                         free(orig);
832                 }
833
834         free(rec);
835
836         return ERR_PTR(ret);
837 }
838
839 static void print_orphan_data_extents(struct list_head *orphan_extents,
840                                       u64 objectid)
841 {
842         struct orphan_data_extent *orphan;
843
844         if (list_empty(orphan_extents))
845                 return;
846         printf("The following data extent is lost in tree %llu:\n",
847                objectid);
848         list_for_each_entry(orphan, orphan_extents, list) {
849                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
850                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
851                        orphan->disk_len);
852         }
853 }
854
855 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
856 {
857         u64 root_objectid = root->root_key.objectid;
858         int errors = rec->errors;
859
860         if (!errors)
861                 return;
862         /* reloc root errors, we print its corresponding fs root objectid*/
863         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
864                 root_objectid = root->root_key.offset;
865                 fprintf(stderr, "reloc");
866         }
867         fprintf(stderr, "root %llu inode %llu errors %x",
868                 (unsigned long long) root_objectid,
869                 (unsigned long long) rec->ino, rec->errors);
870
871         if (errors & I_ERR_NO_INODE_ITEM)
872                 fprintf(stderr, ", no inode item");
873         if (errors & I_ERR_NO_ORPHAN_ITEM)
874                 fprintf(stderr, ", no orphan item");
875         if (errors & I_ERR_DUP_INODE_ITEM)
876                 fprintf(stderr, ", dup inode item");
877         if (errors & I_ERR_DUP_DIR_INDEX)
878                 fprintf(stderr, ", dup dir index");
879         if (errors & I_ERR_ODD_DIR_ITEM)
880                 fprintf(stderr, ", odd dir item");
881         if (errors & I_ERR_ODD_FILE_EXTENT)
882                 fprintf(stderr, ", odd file extent");
883         if (errors & I_ERR_BAD_FILE_EXTENT)
884                 fprintf(stderr, ", bad file extent");
885         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
886                 fprintf(stderr, ", file extent overlap");
887         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
888                 fprintf(stderr, ", file extent discount");
889         if (errors & I_ERR_DIR_ISIZE_WRONG)
890                 fprintf(stderr, ", dir isize wrong");
891         if (errors & I_ERR_FILE_NBYTES_WRONG)
892                 fprintf(stderr, ", nbytes wrong");
893         if (errors & I_ERR_ODD_CSUM_ITEM)
894                 fprintf(stderr, ", odd csum item");
895         if (errors & I_ERR_SOME_CSUM_MISSING)
896                 fprintf(stderr, ", some csum missing");
897         if (errors & I_ERR_LINK_COUNT_WRONG)
898                 fprintf(stderr, ", link count wrong");
899         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900                 fprintf(stderr, ", orphan file extent");
901         fprintf(stderr, "\n");
902         /* Print the orphan extents if needed */
903         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
904                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
905
906         /* Print the holes if needed */
907         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
908                 struct file_extent_hole *hole;
909                 struct rb_node *node;
910                 int found = 0;
911
912                 node = rb_first(&rec->holes);
913                 fprintf(stderr, "Found file extent holes:\n");
914                 while (node) {
915                         found = 1;
916                         hole = rb_entry(node, struct file_extent_hole, node);
917                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
918                                 hole->start, hole->len);
919                         node = rb_next(node);
920                 }
921                 if (!found)
922                         fprintf(stderr, "\tstart: 0, len: %llu\n",
923                                 round_up(rec->isize,
924                                          root->fs_info->sectorsize));
925         }
926 }
927
928 static void print_ref_error(int errors)
929 {
930         if (errors & REF_ERR_NO_DIR_ITEM)
931                 fprintf(stderr, ", no dir item");
932         if (errors & REF_ERR_NO_DIR_INDEX)
933                 fprintf(stderr, ", no dir index");
934         if (errors & REF_ERR_NO_INODE_REF)
935                 fprintf(stderr, ", no inode ref");
936         if (errors & REF_ERR_DUP_DIR_ITEM)
937                 fprintf(stderr, ", dup dir item");
938         if (errors & REF_ERR_DUP_DIR_INDEX)
939                 fprintf(stderr, ", dup dir index");
940         if (errors & REF_ERR_DUP_INODE_REF)
941                 fprintf(stderr, ", dup inode ref");
942         if (errors & REF_ERR_INDEX_UNMATCH)
943                 fprintf(stderr, ", index mismatch");
944         if (errors & REF_ERR_FILETYPE_UNMATCH)
945                 fprintf(stderr, ", filetype mismatch");
946         if (errors & REF_ERR_NAME_TOO_LONG)
947                 fprintf(stderr, ", name too long");
948         if (errors & REF_ERR_NO_ROOT_REF)
949                 fprintf(stderr, ", no root ref");
950         if (errors & REF_ERR_NO_ROOT_BACKREF)
951                 fprintf(stderr, ", no root backref");
952         if (errors & REF_ERR_DUP_ROOT_REF)
953                 fprintf(stderr, ", dup root ref");
954         if (errors & REF_ERR_DUP_ROOT_BACKREF)
955                 fprintf(stderr, ", dup root backref");
956         fprintf(stderr, "\n");
957 }
958
959 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
960                                           u64 ino, int mod)
961 {
962         struct ptr_node *node;
963         struct cache_extent *cache;
964         struct inode_record *rec = NULL;
965         int ret;
966
967         cache = lookup_cache_extent(inode_cache, ino, 1);
968         if (cache) {
969                 node = container_of(cache, struct ptr_node, cache);
970                 rec = node->data;
971                 if (mod && rec->refs > 1) {
972                         node->data = clone_inode_rec(rec);
973                         if (IS_ERR(node->data))
974                                 return node->data;
975                         rec->refs--;
976                         rec = node->data;
977                 }
978         } else if (mod) {
979                 rec = calloc(1, sizeof(*rec));
980                 if (!rec)
981                         return ERR_PTR(-ENOMEM);
982                 rec->ino = ino;
983                 rec->extent_start = (u64)-1;
984                 rec->refs = 1;
985                 INIT_LIST_HEAD(&rec->backrefs);
986                 INIT_LIST_HEAD(&rec->orphan_extents);
987                 rec->holes = RB_ROOT;
988
989                 node = malloc(sizeof(*node));
990                 if (!node) {
991                         free(rec);
992                         return ERR_PTR(-ENOMEM);
993                 }
994                 node->cache.start = ino;
995                 node->cache.size = 1;
996                 node->data = rec;
997
998                 if (ino == BTRFS_FREE_INO_OBJECTID)
999                         rec->found_link = 1;
1000
1001                 ret = insert_cache_extent(inode_cache, &node->cache);
1002                 if (ret)
1003                         return ERR_PTR(-EEXIST);
1004         }
1005         return rec;
1006 }
1007
1008 static void free_orphan_data_extents(struct list_head *orphan_extents)
1009 {
1010         struct orphan_data_extent *orphan;
1011
1012         while (!list_empty(orphan_extents)) {
1013                 orphan = list_entry(orphan_extents->next,
1014                                     struct orphan_data_extent, list);
1015                 list_del(&orphan->list);
1016                 free(orphan);
1017         }
1018 }
1019
1020 static void free_inode_rec(struct inode_record *rec)
1021 {
1022         struct inode_backref *backref;
1023
1024         if (--rec->refs > 0)
1025                 return;
1026
1027         while (!list_empty(&rec->backrefs)) {
1028                 backref = to_inode_backref(rec->backrefs.next);
1029                 list_del(&backref->list);
1030                 free(backref);
1031         }
1032         free_orphan_data_extents(&rec->orphan_extents);
1033         free_file_extent_holes(&rec->holes);
1034         free(rec);
1035 }
1036
1037 static int can_free_inode_rec(struct inode_record *rec)
1038 {
1039         if (!rec->errors && rec->checked && rec->found_inode_item &&
1040             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1041                 return 1;
1042         return 0;
1043 }
1044
1045 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1046                                  struct inode_record *rec)
1047 {
1048         struct cache_extent *cache;
1049         struct inode_backref *tmp, *backref;
1050         struct ptr_node *node;
1051         u8 filetype;
1052
1053         if (!rec->found_inode_item)
1054                 return;
1055
1056         filetype = imode_to_type(rec->imode);
1057         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1058                 if (backref->found_dir_item && backref->found_dir_index) {
1059                         if (backref->filetype != filetype)
1060                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1061                         if (!backref->errors && backref->found_inode_ref &&
1062                             rec->nlink == rec->found_link) {
1063                                 list_del(&backref->list);
1064                                 free(backref);
1065                         }
1066                 }
1067         }
1068
1069         if (!rec->checked || rec->merging)
1070                 return;
1071
1072         if (S_ISDIR(rec->imode)) {
1073                 if (rec->found_size != rec->isize)
1074                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1075                 if (rec->found_file_extent)
1076                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1077         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1078                 if (rec->found_dir_item)
1079                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1080                 if (rec->found_size != rec->nbytes)
1081                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1082                 if (rec->nlink > 0 && !no_holes &&
1083                     (rec->extent_end < rec->isize ||
1084                      first_extent_gap(&rec->holes) < rec->isize))
1085                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086         }
1087
1088         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1089                 if (rec->found_csum_item && rec->nodatasum)
1090                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1091                 if (rec->some_csum_missing && !rec->nodatasum)
1092                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093         }
1094
1095         BUG_ON(rec->refs != 1);
1096         if (can_free_inode_rec(rec)) {
1097                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1098                 node = container_of(cache, struct ptr_node, cache);
1099                 BUG_ON(node->data != rec);
1100                 remove_cache_extent(inode_cache, &node->cache);
1101                 free(node);
1102                 free_inode_rec(rec);
1103         }
1104 }
1105
1106 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1107 {
1108         struct btrfs_path path;
1109         struct btrfs_key key;
1110         int ret;
1111
1112         key.objectid = BTRFS_ORPHAN_OBJECTID;
1113         key.type = BTRFS_ORPHAN_ITEM_KEY;
1114         key.offset = ino;
1115
1116         btrfs_init_path(&path);
1117         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1118         btrfs_release_path(&path);
1119         if (ret > 0)
1120                 ret = -ENOENT;
1121         return ret;
1122 }
1123
1124 static int process_inode_item(struct extent_buffer *eb,
1125                               int slot, struct btrfs_key *key,
1126                               struct shared_node *active_node)
1127 {
1128         struct inode_record *rec;
1129         struct btrfs_inode_item *item;
1130
1131         rec = active_node->current;
1132         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1133         if (rec->found_inode_item) {
1134                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1135                 return 1;
1136         }
1137         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1138         rec->nlink = btrfs_inode_nlink(eb, item);
1139         rec->isize = btrfs_inode_size(eb, item);
1140         rec->nbytes = btrfs_inode_nbytes(eb, item);
1141         rec->imode = btrfs_inode_mode(eb, item);
1142         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1143                 rec->nodatasum = 1;
1144         rec->found_inode_item = 1;
1145         if (rec->nlink == 0)
1146                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1147         maybe_free_inode_rec(&active_node->inode_cache, rec);
1148         return 0;
1149 }
1150
1151 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1152                                                 const char *name,
1153                                                 int namelen, u64 dir)
1154 {
1155         struct inode_backref *backref;
1156
1157         list_for_each_entry(backref, &rec->backrefs, list) {
1158                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1159                         break;
1160                 if (backref->dir != dir || backref->namelen != namelen)
1161                         continue;
1162                 if (memcmp(name, backref->name, namelen))
1163                         continue;
1164                 return backref;
1165         }
1166
1167         backref = malloc(sizeof(*backref) + namelen + 1);
1168         if (!backref)
1169                 return NULL;
1170         memset(backref, 0, sizeof(*backref));
1171         backref->dir = dir;
1172         backref->namelen = namelen;
1173         memcpy(backref->name, name, namelen);
1174         backref->name[namelen] = '\0';
1175         list_add_tail(&backref->list, &rec->backrefs);
1176         return backref;
1177 }
1178
1179 static int add_inode_backref(struct cache_tree *inode_cache,
1180                              u64 ino, u64 dir, u64 index,
1181                              const char *name, int namelen,
1182                              u8 filetype, u8 itemtype, int errors)
1183 {
1184         struct inode_record *rec;
1185         struct inode_backref *backref;
1186
1187         rec = get_inode_rec(inode_cache, ino, 1);
1188         BUG_ON(IS_ERR(rec));
1189         backref = get_inode_backref(rec, name, namelen, dir);
1190         BUG_ON(!backref);
1191         if (errors)
1192                 backref->errors |= errors;
1193         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1194                 if (backref->found_dir_index)
1195                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1196                 if (backref->found_inode_ref && backref->index != index)
1197                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1198                 if (backref->found_dir_item && backref->filetype != filetype)
1199                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1200
1201                 backref->index = index;
1202                 backref->filetype = filetype;
1203                 backref->found_dir_index = 1;
1204         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1205                 rec->found_link++;
1206                 if (backref->found_dir_item)
1207                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1208                 if (backref->found_dir_index && backref->filetype != filetype)
1209                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1210
1211                 backref->filetype = filetype;
1212                 backref->found_dir_item = 1;
1213         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1214                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1215                 if (backref->found_inode_ref)
1216                         backref->errors |= REF_ERR_DUP_INODE_REF;
1217                 if (backref->found_dir_index && backref->index != index)
1218                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1219                 else
1220                         backref->index = index;
1221
1222                 backref->ref_type = itemtype;
1223                 backref->found_inode_ref = 1;
1224         } else {
1225                 BUG_ON(1);
1226         }
1227
1228         maybe_free_inode_rec(inode_cache, rec);
1229         return 0;
1230 }
1231
1232 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1233                             struct cache_tree *dst_cache)
1234 {
1235         struct inode_backref *backref;
1236         u32 dir_count = 0;
1237         int ret = 0;
1238
1239         dst->merging = 1;
1240         list_for_each_entry(backref, &src->backrefs, list) {
1241                 if (backref->found_dir_index) {
1242                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1243                                         backref->index, backref->name,
1244                                         backref->namelen, backref->filetype,
1245                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1246                 }
1247                 if (backref->found_dir_item) {
1248                         dir_count++;
1249                         add_inode_backref(dst_cache, dst->ino,
1250                                         backref->dir, 0, backref->name,
1251                                         backref->namelen, backref->filetype,
1252                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1253                 }
1254                 if (backref->found_inode_ref) {
1255                         add_inode_backref(dst_cache, dst->ino,
1256                                         backref->dir, backref->index,
1257                                         backref->name, backref->namelen, 0,
1258                                         backref->ref_type, backref->errors);
1259                 }
1260         }
1261
1262         if (src->found_dir_item)
1263                 dst->found_dir_item = 1;
1264         if (src->found_file_extent)
1265                 dst->found_file_extent = 1;
1266         if (src->found_csum_item)
1267                 dst->found_csum_item = 1;
1268         if (src->some_csum_missing)
1269                 dst->some_csum_missing = 1;
1270         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1271                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1272                 if (ret < 0)
1273                         return ret;
1274         }
1275
1276         BUG_ON(src->found_link < dir_count);
1277         dst->found_link += src->found_link - dir_count;
1278         dst->found_size += src->found_size;
1279         if (src->extent_start != (u64)-1) {
1280                 if (dst->extent_start == (u64)-1) {
1281                         dst->extent_start = src->extent_start;
1282                         dst->extent_end = src->extent_end;
1283                 } else {
1284                         if (dst->extent_end > src->extent_start)
1285                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1286                         else if (dst->extent_end < src->extent_start) {
1287                                 ret = add_file_extent_hole(&dst->holes,
1288                                         dst->extent_end,
1289                                         src->extent_start - dst->extent_end);
1290                         }
1291                         if (dst->extent_end < src->extent_end)
1292                                 dst->extent_end = src->extent_end;
1293                 }
1294         }
1295
1296         dst->errors |= src->errors;
1297         if (src->found_inode_item) {
1298                 if (!dst->found_inode_item) {
1299                         dst->nlink = src->nlink;
1300                         dst->isize = src->isize;
1301                         dst->nbytes = src->nbytes;
1302                         dst->imode = src->imode;
1303                         dst->nodatasum = src->nodatasum;
1304                         dst->found_inode_item = 1;
1305                 } else {
1306                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1307                 }
1308         }
1309         dst->merging = 0;
1310
1311         return 0;
1312 }
1313
1314 static int splice_shared_node(struct shared_node *src_node,
1315                               struct shared_node *dst_node)
1316 {
1317         struct cache_extent *cache;
1318         struct ptr_node *node, *ins;
1319         struct cache_tree *src, *dst;
1320         struct inode_record *rec, *conflict;
1321         u64 current_ino = 0;
1322         int splice = 0;
1323         int ret;
1324
1325         if (--src_node->refs == 0)
1326                 splice = 1;
1327         if (src_node->current)
1328                 current_ino = src_node->current->ino;
1329
1330         src = &src_node->root_cache;
1331         dst = &dst_node->root_cache;
1332 again:
1333         cache = search_cache_extent(src, 0);
1334         while (cache) {
1335                 node = container_of(cache, struct ptr_node, cache);
1336                 rec = node->data;
1337                 cache = next_cache_extent(cache);
1338
1339                 if (splice) {
1340                         remove_cache_extent(src, &node->cache);
1341                         ins = node;
1342                 } else {
1343                         ins = malloc(sizeof(*ins));
1344                         BUG_ON(!ins);
1345                         ins->cache.start = node->cache.start;
1346                         ins->cache.size = node->cache.size;
1347                         ins->data = rec;
1348                         rec->refs++;
1349                 }
1350                 ret = insert_cache_extent(dst, &ins->cache);
1351                 if (ret == -EEXIST) {
1352                         conflict = get_inode_rec(dst, rec->ino, 1);
1353                         BUG_ON(IS_ERR(conflict));
1354                         merge_inode_recs(rec, conflict, dst);
1355                         if (rec->checked) {
1356                                 conflict->checked = 1;
1357                                 if (dst_node->current == conflict)
1358                                         dst_node->current = NULL;
1359                         }
1360                         maybe_free_inode_rec(dst, conflict);
1361                         free_inode_rec(rec);
1362                         free(ins);
1363                 } else {
1364                         BUG_ON(ret);
1365                 }
1366         }
1367
1368         if (src == &src_node->root_cache) {
1369                 src = &src_node->inode_cache;
1370                 dst = &dst_node->inode_cache;
1371                 goto again;
1372         }
1373
1374         if (current_ino > 0 && (!dst_node->current ||
1375             current_ino > dst_node->current->ino)) {
1376                 if (dst_node->current) {
1377                         dst_node->current->checked = 1;
1378                         maybe_free_inode_rec(dst, dst_node->current);
1379                 }
1380                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1381                 BUG_ON(IS_ERR(dst_node->current));
1382         }
1383         return 0;
1384 }
1385
1386 static void free_inode_ptr(struct cache_extent *cache)
1387 {
1388         struct ptr_node *node;
1389         struct inode_record *rec;
1390
1391         node = container_of(cache, struct ptr_node, cache);
1392         rec = node->data;
1393         free_inode_rec(rec);
1394         free(node);
1395 }
1396
1397 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1398
1399 static struct shared_node *find_shared_node(struct cache_tree *shared,
1400                                             u64 bytenr)
1401 {
1402         struct cache_extent *cache;
1403         struct shared_node *node;
1404
1405         cache = lookup_cache_extent(shared, bytenr, 1);
1406         if (cache) {
1407                 node = container_of(cache, struct shared_node, cache);
1408                 return node;
1409         }
1410         return NULL;
1411 }
1412
1413 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1414 {
1415         int ret;
1416         struct shared_node *node;
1417
1418         node = calloc(1, sizeof(*node));
1419         if (!node)
1420                 return -ENOMEM;
1421         node->cache.start = bytenr;
1422         node->cache.size = 1;
1423         cache_tree_init(&node->root_cache);
1424         cache_tree_init(&node->inode_cache);
1425         node->refs = refs;
1426
1427         ret = insert_cache_extent(shared, &node->cache);
1428
1429         return ret;
1430 }
1431
1432 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1433                              struct walk_control *wc, int level)
1434 {
1435         struct shared_node *node;
1436         struct shared_node *dest;
1437         int ret;
1438
1439         if (level == wc->active_node)
1440                 return 0;
1441
1442         BUG_ON(wc->active_node <= level);
1443         node = find_shared_node(&wc->shared, bytenr);
1444         if (!node) {
1445                 ret = add_shared_node(&wc->shared, bytenr, refs);
1446                 BUG_ON(ret);
1447                 node = find_shared_node(&wc->shared, bytenr);
1448                 wc->nodes[level] = node;
1449                 wc->active_node = level;
1450                 return 0;
1451         }
1452
1453         if (wc->root_level == wc->active_node &&
1454             btrfs_root_refs(&root->root_item) == 0) {
1455                 if (--node->refs == 0) {
1456                         free_inode_recs_tree(&node->root_cache);
1457                         free_inode_recs_tree(&node->inode_cache);
1458                         remove_cache_extent(&wc->shared, &node->cache);
1459                         free(node);
1460                 }
1461                 return 1;
1462         }
1463
1464         dest = wc->nodes[wc->active_node];
1465         splice_shared_node(node, dest);
1466         if (node->refs == 0) {
1467                 remove_cache_extent(&wc->shared, &node->cache);
1468                 free(node);
1469         }
1470         return 1;
1471 }
1472
1473 static int leave_shared_node(struct btrfs_root *root,
1474                              struct walk_control *wc, int level)
1475 {
1476         struct shared_node *node;
1477         struct shared_node *dest;
1478         int i;
1479
1480         if (level == wc->root_level)
1481                 return 0;
1482
1483         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1484                 if (wc->nodes[i])
1485                         break;
1486         }
1487         BUG_ON(i >= BTRFS_MAX_LEVEL);
1488
1489         node = wc->nodes[wc->active_node];
1490         wc->nodes[wc->active_node] = NULL;
1491         wc->active_node = i;
1492
1493         dest = wc->nodes[wc->active_node];
1494         if (wc->active_node < wc->root_level ||
1495             btrfs_root_refs(&root->root_item) > 0) {
1496                 BUG_ON(node->refs <= 1);
1497                 splice_shared_node(node, dest);
1498         } else {
1499                 BUG_ON(node->refs < 2);
1500                 node->refs--;
1501         }
1502         return 0;
1503 }
1504
1505 /*
1506  * Returns:
1507  * < 0 - on error
1508  * 1   - if the root with id child_root_id is a child of root parent_root_id
1509  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1510  *       has other root(s) as parent(s)
1511  * 2   - if the root child_root_id doesn't have any parent roots
1512  */
1513 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1514                          u64 child_root_id)
1515 {
1516         struct btrfs_path path;
1517         struct btrfs_key key;
1518         struct extent_buffer *leaf;
1519         int has_parent = 0;
1520         int ret;
1521
1522         btrfs_init_path(&path);
1523
1524         key.objectid = parent_root_id;
1525         key.type = BTRFS_ROOT_REF_KEY;
1526         key.offset = child_root_id;
1527         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1528                                 0, 0);
1529         if (ret < 0)
1530                 return ret;
1531         btrfs_release_path(&path);
1532         if (!ret)
1533                 return 1;
1534
1535         key.objectid = child_root_id;
1536         key.type = BTRFS_ROOT_BACKREF_KEY;
1537         key.offset = 0;
1538         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1539                                 0, 0);
1540         if (ret < 0)
1541                 goto out;
1542
1543         while (1) {
1544                 leaf = path.nodes[0];
1545                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1546                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1547                         if (ret)
1548                                 break;
1549                         leaf = path.nodes[0];
1550                 }
1551
1552                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1553                 if (key.objectid != child_root_id ||
1554                     key.type != BTRFS_ROOT_BACKREF_KEY)
1555                         break;
1556
1557                 has_parent = 1;
1558
1559                 if (key.offset == parent_root_id) {
1560                         btrfs_release_path(&path);
1561                         return 1;
1562                 }
1563
1564                 path.slots[0]++;
1565         }
1566 out:
1567         btrfs_release_path(&path);
1568         if (ret < 0)
1569                 return ret;
1570         return has_parent ? 0 : 2;
1571 }
1572
1573 static int process_dir_item(struct extent_buffer *eb,
1574                             int slot, struct btrfs_key *key,
1575                             struct shared_node *active_node)
1576 {
1577         u32 total;
1578         u32 cur = 0;
1579         u32 len;
1580         u32 name_len;
1581         u32 data_len;
1582         int error;
1583         int nritems = 0;
1584         u8 filetype;
1585         struct btrfs_dir_item *di;
1586         struct inode_record *rec;
1587         struct cache_tree *root_cache;
1588         struct cache_tree *inode_cache;
1589         struct btrfs_key location;
1590         char namebuf[BTRFS_NAME_LEN];
1591
1592         root_cache = &active_node->root_cache;
1593         inode_cache = &active_node->inode_cache;
1594         rec = active_node->current;
1595         rec->found_dir_item = 1;
1596
1597         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1598         total = btrfs_item_size_nr(eb, slot);
1599         while (cur < total) {
1600                 nritems++;
1601                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1602                 name_len = btrfs_dir_name_len(eb, di);
1603                 data_len = btrfs_dir_data_len(eb, di);
1604                 filetype = btrfs_dir_type(eb, di);
1605
1606                 rec->found_size += name_len;
1607                 if (cur + sizeof(*di) + name_len > total ||
1608                     name_len > BTRFS_NAME_LEN) {
1609                         error = REF_ERR_NAME_TOO_LONG;
1610
1611                         if (cur + sizeof(*di) > total)
1612                                 break;
1613                         len = min_t(u32, total - cur - sizeof(*di),
1614                                     BTRFS_NAME_LEN);
1615                 } else {
1616                         len = name_len;
1617                         error = 0;
1618                 }
1619
1620                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1621
1622                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1623                     key->offset != btrfs_name_hash(namebuf, len)) {
1624                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1625                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1626                         key->objectid, key->offset, namebuf, len, filetype,
1627                         key->offset, btrfs_name_hash(namebuf, len));
1628                 }
1629
1630                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1631                         add_inode_backref(inode_cache, location.objectid,
1632                                           key->objectid, key->offset, namebuf,
1633                                           len, filetype, key->type, error);
1634                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1635                         add_inode_backref(root_cache, location.objectid,
1636                                           key->objectid, key->offset,
1637                                           namebuf, len, filetype,
1638                                           key->type, error);
1639                 } else {
1640                         fprintf(stderr, "invalid location in dir item %u\n",
1641                                 location.type);
1642                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1643                                           key->objectid, key->offset, namebuf,
1644                                           len, filetype, key->type, error);
1645                 }
1646
1647                 len = sizeof(*di) + name_len + data_len;
1648                 di = (struct btrfs_dir_item *)((char *)di + len);
1649                 cur += len;
1650         }
1651         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1652                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1653
1654         return 0;
1655 }
1656
1657 static int process_inode_ref(struct extent_buffer *eb,
1658                              int slot, struct btrfs_key *key,
1659                              struct shared_node *active_node)
1660 {
1661         u32 total;
1662         u32 cur = 0;
1663         u32 len;
1664         u32 name_len;
1665         u64 index;
1666         int error;
1667         struct cache_tree *inode_cache;
1668         struct btrfs_inode_ref *ref;
1669         char namebuf[BTRFS_NAME_LEN];
1670
1671         inode_cache = &active_node->inode_cache;
1672
1673         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1674         total = btrfs_item_size_nr(eb, slot);
1675         while (cur < total) {
1676                 name_len = btrfs_inode_ref_name_len(eb, ref);
1677                 index = btrfs_inode_ref_index(eb, ref);
1678
1679                 /* inode_ref + namelen should not cross item boundary */
1680                 if (cur + sizeof(*ref) + name_len > total ||
1681                     name_len > BTRFS_NAME_LEN) {
1682                         if (total < cur + sizeof(*ref))
1683                                 break;
1684
1685                         /* Still try to read out the remaining part */
1686                         len = min_t(u32, total - cur - sizeof(*ref),
1687                                     BTRFS_NAME_LEN);
1688                         error = REF_ERR_NAME_TOO_LONG;
1689                 } else {
1690                         len = name_len;
1691                         error = 0;
1692                 }
1693
1694                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1695                 add_inode_backref(inode_cache, key->objectid, key->offset,
1696                                   index, namebuf, len, 0, key->type, error);
1697
1698                 len = sizeof(*ref) + name_len;
1699                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1700                 cur += len;
1701         }
1702         return 0;
1703 }
1704
1705 static int process_inode_extref(struct extent_buffer *eb,
1706                                 int slot, struct btrfs_key *key,
1707                                 struct shared_node *active_node)
1708 {
1709         u32 total;
1710         u32 cur = 0;
1711         u32 len;
1712         u32 name_len;
1713         u64 index;
1714         u64 parent;
1715         int error;
1716         struct cache_tree *inode_cache;
1717         struct btrfs_inode_extref *extref;
1718         char namebuf[BTRFS_NAME_LEN];
1719
1720         inode_cache = &active_node->inode_cache;
1721
1722         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1723         total = btrfs_item_size_nr(eb, slot);
1724         while (cur < total) {
1725                 name_len = btrfs_inode_extref_name_len(eb, extref);
1726                 index = btrfs_inode_extref_index(eb, extref);
1727                 parent = btrfs_inode_extref_parent(eb, extref);
1728                 if (name_len <= BTRFS_NAME_LEN) {
1729                         len = name_len;
1730                         error = 0;
1731                 } else {
1732                         len = BTRFS_NAME_LEN;
1733                         error = REF_ERR_NAME_TOO_LONG;
1734                 }
1735                 read_extent_buffer(eb, namebuf,
1736                                    (unsigned long)(extref + 1), len);
1737                 add_inode_backref(inode_cache, key->objectid, parent,
1738                                   index, namebuf, len, 0, key->type, error);
1739
1740                 len = sizeof(*extref) + name_len;
1741                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1742                 cur += len;
1743         }
1744         return 0;
1745
1746 }
1747
1748 static int count_csum_range(struct btrfs_root *root, u64 start,
1749                             u64 len, u64 *found)
1750 {
1751         struct btrfs_key key;
1752         struct btrfs_path path;
1753         struct extent_buffer *leaf;
1754         int ret;
1755         size_t size;
1756         *found = 0;
1757         u64 csum_end;
1758         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1759
1760         btrfs_init_path(&path);
1761
1762         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1763         key.offset = start;
1764         key.type = BTRFS_EXTENT_CSUM_KEY;
1765
1766         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1767                                 &key, &path, 0, 0);
1768         if (ret < 0)
1769                 goto out;
1770         if (ret > 0 && path.slots[0] > 0) {
1771                 leaf = path.nodes[0];
1772                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1773                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1774                     key.type == BTRFS_EXTENT_CSUM_KEY)
1775                         path.slots[0]--;
1776         }
1777
1778         while (len > 0) {
1779                 leaf = path.nodes[0];
1780                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1781                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1782                         if (ret > 0)
1783                                 break;
1784                         else if (ret < 0)
1785                                 goto out;
1786                         leaf = path.nodes[0];
1787                 }
1788
1789                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1790                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1791                     key.type != BTRFS_EXTENT_CSUM_KEY)
1792                         break;
1793
1794                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1795                 if (key.offset >= start + len)
1796                         break;
1797
1798                 if (key.offset > start)
1799                         start = key.offset;
1800
1801                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1802                 csum_end = key.offset + (size / csum_size) *
1803                            root->fs_info->sectorsize;
1804                 if (csum_end > start) {
1805                         size = min(csum_end - start, len);
1806                         len -= size;
1807                         start += size;
1808                         *found += size;
1809                 }
1810
1811                 path.slots[0]++;
1812         }
1813 out:
1814         btrfs_release_path(&path);
1815         if (ret < 0)
1816                 return ret;
1817         return 0;
1818 }
1819
1820 static int process_file_extent(struct btrfs_root *root,
1821                                 struct extent_buffer *eb,
1822                                 int slot, struct btrfs_key *key,
1823                                 struct shared_node *active_node)
1824 {
1825         struct inode_record *rec;
1826         struct btrfs_file_extent_item *fi;
1827         u64 num_bytes = 0;
1828         u64 disk_bytenr = 0;
1829         u64 extent_offset = 0;
1830         u64 mask = root->fs_info->sectorsize - 1;
1831         int extent_type;
1832         int ret;
1833
1834         rec = active_node->current;
1835         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1836         rec->found_file_extent = 1;
1837
1838         if (rec->extent_start == (u64)-1) {
1839                 rec->extent_start = key->offset;
1840                 rec->extent_end = key->offset;
1841         }
1842
1843         if (rec->extent_end > key->offset)
1844                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1845         else if (rec->extent_end < key->offset) {
1846                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1847                                            key->offset - rec->extent_end);
1848                 if (ret < 0)
1849                         return ret;
1850         }
1851
1852         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1853         extent_type = btrfs_file_extent_type(eb, fi);
1854
1855         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1856                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1857                 if (num_bytes == 0)
1858                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1859                 rec->found_size += num_bytes;
1860                 num_bytes = (num_bytes + mask) & ~mask;
1861         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1862                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1863                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1864                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1865                 extent_offset = btrfs_file_extent_offset(eb, fi);
1866                 if (num_bytes == 0 || (num_bytes & mask))
1867                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868                 if (num_bytes + extent_offset >
1869                     btrfs_file_extent_ram_bytes(eb, fi))
1870                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1871                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1872                     (btrfs_file_extent_compression(eb, fi) ||
1873                      btrfs_file_extent_encryption(eb, fi) ||
1874                      btrfs_file_extent_other_encoding(eb, fi)))
1875                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1876                 if (disk_bytenr > 0)
1877                         rec->found_size += num_bytes;
1878         } else {
1879                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1880         }
1881         rec->extent_end = key->offset + num_bytes;
1882
1883         /*
1884          * The data reloc tree will copy full extents into its inode and then
1885          * copy the corresponding csums.  Because the extent it copied could be
1886          * a preallocated extent that hasn't been written to yet there may be no
1887          * csums to copy, ergo we won't have csums for our file extent.  This is
1888          * ok so just don't bother checking csums if the inode belongs to the
1889          * data reloc tree.
1890          */
1891         if (disk_bytenr > 0 &&
1892             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1893                 u64 found;
1894                 if (btrfs_file_extent_compression(eb, fi))
1895                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1896                 else
1897                         disk_bytenr += extent_offset;
1898
1899                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1900                 if (ret < 0)
1901                         return ret;
1902                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1903                         if (found > 0)
1904                                 rec->found_csum_item = 1;
1905                         if (found < num_bytes)
1906                                 rec->some_csum_missing = 1;
1907                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1908                         if (found > 0)
1909                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1910                 }
1911         }
1912         return 0;
1913 }
1914
1915 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1916                             struct walk_control *wc)
1917 {
1918         struct btrfs_key key;
1919         u32 nritems;
1920         int i;
1921         int ret = 0;
1922         struct cache_tree *inode_cache;
1923         struct shared_node *active_node;
1924
1925         if (wc->root_level == wc->active_node &&
1926             btrfs_root_refs(&root->root_item) == 0)
1927                 return 0;
1928
1929         active_node = wc->nodes[wc->active_node];
1930         inode_cache = &active_node->inode_cache;
1931         nritems = btrfs_header_nritems(eb);
1932         for (i = 0; i < nritems; i++) {
1933                 btrfs_item_key_to_cpu(eb, &key, i);
1934
1935                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1936                         continue;
1937                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1938                         continue;
1939
1940                 if (active_node->current == NULL ||
1941                     active_node->current->ino < key.objectid) {
1942                         if (active_node->current) {
1943                                 active_node->current->checked = 1;
1944                                 maybe_free_inode_rec(inode_cache,
1945                                                      active_node->current);
1946                         }
1947                         active_node->current = get_inode_rec(inode_cache,
1948                                                              key.objectid, 1);
1949                         BUG_ON(IS_ERR(active_node->current));
1950                 }
1951                 switch (key.type) {
1952                 case BTRFS_DIR_ITEM_KEY:
1953                 case BTRFS_DIR_INDEX_KEY:
1954                         ret = process_dir_item(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_REF_KEY:
1957                         ret = process_inode_ref(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_INODE_EXTREF_KEY:
1960                         ret = process_inode_extref(eb, i, &key, active_node);
1961                         break;
1962                 case BTRFS_INODE_ITEM_KEY:
1963                         ret = process_inode_item(eb, i, &key, active_node);
1964                         break;
1965                 case BTRFS_EXTENT_DATA_KEY:
1966                         ret = process_file_extent(root, eb, i, &key,
1967                                                   active_node);
1968                         break;
1969                 default:
1970                         break;
1971                 };
1972         }
1973         return ret;
1974 }
1975
1976 struct node_refs {
1977         u64 bytenr[BTRFS_MAX_LEVEL];
1978         u64 refs[BTRFS_MAX_LEVEL];
1979         int need_check[BTRFS_MAX_LEVEL];
1980 };
1981
1982 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1983                              struct node_refs *nrefs, u64 level);
1984 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1985                             unsigned int ext_ref);
1986
1987 /*
1988  * Returns >0  Found error, not fatal, should continue
1989  * Returns <0  Fatal error, must exit the whole check
1990  * Returns 0   No errors found
1991  */
1992 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1993                                struct node_refs *nrefs, int *level, int ext_ref)
1994 {
1995         struct extent_buffer *cur = path->nodes[0];
1996         struct btrfs_key key;
1997         u64 cur_bytenr;
1998         u32 nritems;
1999         u64 first_ino = 0;
2000         int root_level = btrfs_header_level(root->node);
2001         int i;
2002         int ret = 0; /* Final return value */
2003         int err = 0; /* Positive error bitmap */
2004
2005         cur_bytenr = cur->start;
2006
2007         /* skip to first inode item or the first inode number change */
2008         nritems = btrfs_header_nritems(cur);
2009         for (i = 0; i < nritems; i++) {
2010                 btrfs_item_key_to_cpu(cur, &key, i);
2011                 if (i == 0)
2012                         first_ino = key.objectid;
2013                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2014                     (first_ino && first_ino != key.objectid))
2015                         break;
2016         }
2017         if (i == nritems) {
2018                 path->slots[0] = nritems;
2019                 return 0;
2020         }
2021         path->slots[0] = i;
2022
2023 again:
2024         err |= check_inode_item(root, path, ext_ref);
2025
2026         if (err & LAST_ITEM)
2027                 goto out;
2028
2029         /* still have inode items in thie leaf */
2030         if (cur->start == cur_bytenr)
2031                 goto again;
2032
2033         /*
2034          * we have switched to another leaf, above nodes may
2035          * have changed, here walk down the path, if a node
2036          * or leaf is shared, check whether we can skip this
2037          * node or leaf.
2038          */
2039         for (i = root_level; i >= 0; i--) {
2040                 if (path->nodes[i]->start == nrefs->bytenr[i])
2041                         continue;
2042
2043                 ret = update_nodes_refs(root,
2044                                 path->nodes[i]->start,
2045                                 nrefs, i);
2046                 if (ret)
2047                         goto out;
2048
2049                 if (!nrefs->need_check[i]) {
2050                         *level += 1;
2051                         break;
2052                 }
2053         }
2054
2055         for (i = 0; i < *level; i++) {
2056                 free_extent_buffer(path->nodes[i]);
2057                 path->nodes[i] = NULL;
2058         }
2059 out:
2060         err &= ~LAST_ITEM;
2061         if (err && !ret)
2062                 ret = err;
2063         return ret;
2064 }
2065
2066 static void reada_walk_down(struct btrfs_root *root,
2067                             struct extent_buffer *node, int slot)
2068 {
2069         struct btrfs_fs_info *fs_info = root->fs_info;
2070         u64 bytenr;
2071         u64 ptr_gen;
2072         u32 nritems;
2073         int i;
2074         int level;
2075
2076         level = btrfs_header_level(node);
2077         if (level != 1)
2078                 return;
2079
2080         nritems = btrfs_header_nritems(node);
2081         for (i = slot; i < nritems; i++) {
2082                 bytenr = btrfs_node_blockptr(node, i);
2083                 ptr_gen = btrfs_node_ptr_generation(node, i);
2084                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2085         }
2086 }
2087
2088 /*
2089  * Check the child node/leaf by the following condition:
2090  * 1. the first item key of the node/leaf should be the same with the one
2091  *    in parent.
2092  * 2. block in parent node should match the child node/leaf.
2093  * 3. generation of parent node and child's header should be consistent.
2094  *
2095  * Or the child node/leaf pointed by the key in parent is not valid.
2096  *
2097  * We hope to check leaf owner too, but since subvol may share leaves,
2098  * which makes leaf owner check not so strong, key check should be
2099  * sufficient enough for that case.
2100  */
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102                             struct extent_buffer *child)
2103 {
2104         struct btrfs_key parent_key;
2105         struct btrfs_key child_key;
2106         int ret = 0;
2107
2108         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109         if (btrfs_header_level(child) == 0)
2110                 btrfs_item_key_to_cpu(child, &child_key, 0);
2111         else
2112                 btrfs_node_key_to_cpu(child, &child_key, 0);
2113
2114         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2115                 ret = -EINVAL;
2116                 fprintf(stderr,
2117                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118                         parent_key.objectid, parent_key.type, parent_key.offset,
2119                         child_key.objectid, child_key.type, child_key.offset);
2120         }
2121         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122                 ret = -EINVAL;
2123                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124                         btrfs_node_blockptr(parent, slot),
2125                         btrfs_header_bytenr(child));
2126         }
2127         if (btrfs_node_ptr_generation(parent, slot) !=
2128             btrfs_header_generation(child)) {
2129                 ret = -EINVAL;
2130                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131                         btrfs_header_generation(child),
2132                         btrfs_node_ptr_generation(parent, slot));
2133         }
2134         return ret;
2135 }
2136
2137 /*
2138  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139  * in every fs or file tree check. Here we find its all root ids, and only check
2140  * it in the fs or file tree which has the smallest root id.
2141  */
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 {
2144         struct rb_node *node;
2145         struct ulist_node *u;
2146
2147         if (roots->nnodes == 1)
2148                 return 1;
2149
2150         node = rb_first(&roots->root);
2151         u = rb_entry(node, struct ulist_node, rb_node);
2152         /*
2153          * current root id is not smallest, we skip it and let it be checked
2154          * in the fs or file tree who hash the smallest root id.
2155          */
2156         if (root->objectid != u->val)
2157                 return 0;
2158
2159         return 1;
2160 }
2161
2162 /*
2163  * for a tree node or leaf, we record its reference count, so later if we still
2164  * process this node or leaf, don't need to compute its reference count again.
2165  */
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167                              struct node_refs *nrefs, u64 level)
2168 {
2169         int check, ret;
2170         u64 refs;
2171         struct ulist *roots;
2172
2173         if (nrefs->bytenr[level] != bytenr) {
2174                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175                                        level, 1, &refs, NULL);
2176                 if (ret < 0)
2177                         return ret;
2178
2179                 nrefs->bytenr[level] = bytenr;
2180                 nrefs->refs[level] = refs;
2181                 if (refs > 1) {
2182                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2183                                                    0, &roots);
2184                         if (ret)
2185                                 return -EIO;
2186
2187                         check = need_check(root, roots);
2188                         ulist_free(roots);
2189                         nrefs->need_check[level] = check;
2190                 } else {
2191                         nrefs->need_check[level] = 1;
2192                 }
2193         }
2194
2195         return 0;
2196 }
2197
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199                           struct walk_control *wc, int *level,
2200                           struct node_refs *nrefs)
2201 {
2202         enum btrfs_tree_block_status status;
2203         u64 bytenr;
2204         u64 ptr_gen;
2205         struct btrfs_fs_info *fs_info = root->fs_info;
2206         struct extent_buffer *next;
2207         struct extent_buffer *cur;
2208         int ret, err = 0;
2209         u64 refs;
2210
2211         WARN_ON(*level < 0);
2212         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213
2214         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215                 refs = nrefs->refs[*level];
2216                 ret = 0;
2217         } else {
2218                 ret = btrfs_lookup_extent_info(NULL, root,
2219                                        path->nodes[*level]->start,
2220                                        *level, 1, &refs, NULL);
2221                 if (ret < 0) {
2222                         err = ret;
2223                         goto out;
2224                 }
2225                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226                 nrefs->refs[*level] = refs;
2227         }
2228
2229         if (refs > 1) {
2230                 ret = enter_shared_node(root, path->nodes[*level]->start,
2231                                         refs, wc, *level);
2232                 if (ret > 0) {
2233                         err = ret;
2234                         goto out;
2235                 }
2236         }
2237
2238         while (*level >= 0) {
2239                 WARN_ON(*level < 0);
2240                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241                 cur = path->nodes[*level];
2242
2243                 if (btrfs_header_level(cur) != *level)
2244                         WARN_ON(1);
2245
2246                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2247                         break;
2248                 if (*level == 0) {
2249                         ret = process_one_leaf(root, cur, wc);
2250                         if (ret < 0)
2251                                 err = ret;
2252                         break;
2253                 }
2254                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256
2257                 if (bytenr == nrefs->bytenr[*level - 1]) {
2258                         refs = nrefs->refs[*level - 1];
2259                 } else {
2260                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261                                         *level - 1, 1, &refs, NULL);
2262                         if (ret < 0) {
2263                                 refs = 0;
2264                         } else {
2265                                 nrefs->bytenr[*level - 1] = bytenr;
2266                                 nrefs->refs[*level - 1] = refs;
2267                         }
2268                 }
2269
2270                 if (refs > 1) {
2271                         ret = enter_shared_node(root, bytenr, refs,
2272                                                 wc, *level - 1);
2273                         if (ret > 0) {
2274                                 path->slots[*level]++;
2275                                 continue;
2276                         }
2277                 }
2278
2279                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284                         if (!extent_buffer_uptodate(next)) {
2285                                 struct btrfs_key node_key;
2286
2287                                 btrfs_node_key_to_cpu(path->nodes[*level],
2288                                                       &node_key,
2289                                                       path->slots[*level]);
2290                                 btrfs_add_corrupt_extent_record(root->fs_info,
2291                                                 &node_key,
2292                                                 path->nodes[*level]->start,
2293                                                 root->fs_info->nodesize,
2294                                                 *level);
2295                                 err = -EIO;
2296                                 goto out;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret) {
2302                         free_extent_buffer(next);
2303                         err = ret;
2304                         goto out;
2305                 }
2306
2307                 if (btrfs_is_leaf(next))
2308                         status = btrfs_check_leaf(root, NULL, next);
2309                 else
2310                         status = btrfs_check_node(root, NULL, next);
2311                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312                         free_extent_buffer(next);
2313                         err = -EIO;
2314                         goto out;
2315                 }
2316
2317                 *level = *level - 1;
2318                 free_extent_buffer(path->nodes[*level]);
2319                 path->nodes[*level] = next;
2320                 path->slots[*level] = 0;
2321         }
2322 out:
2323         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324         return err;
2325 }
2326
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328                             unsigned int ext_ref);
2329
2330 /*
2331  * Returns >0  Found error, should continue
2332  * Returns <0  Fatal error, must exit the whole check
2333  * Returns 0   No errors found
2334  */
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336                              int *level, struct node_refs *nrefs, int ext_ref)
2337 {
2338         enum btrfs_tree_block_status status;
2339         u64 bytenr;
2340         u64 ptr_gen;
2341         struct btrfs_fs_info *fs_info = root->fs_info;
2342         struct extent_buffer *next;
2343         struct extent_buffer *cur;
2344         int ret;
2345
2346         WARN_ON(*level < 0);
2347         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348
2349         ret = update_nodes_refs(root, path->nodes[*level]->start,
2350                                 nrefs, *level);
2351         if (ret < 0)
2352                 return ret;
2353
2354         while (*level >= 0) {
2355                 WARN_ON(*level < 0);
2356                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357                 cur = path->nodes[*level];
2358
2359                 if (btrfs_header_level(cur) != *level)
2360                         WARN_ON(1);
2361
2362                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363                         break;
2364                 /* Don't forgot to check leaf/node validation */
2365                 if (*level == 0) {
2366                         ret = btrfs_check_leaf(root, NULL, cur);
2367                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368                                 ret = -EIO;
2369                                 break;
2370                         }
2371                         ret = process_one_leaf_v2(root, path, nrefs,
2372                                                   level, ext_ref);
2373                         break;
2374                 } else {
2375                         ret = btrfs_check_node(root, NULL, cur);
2376                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2377                                 ret = -EIO;
2378                                 break;
2379                         }
2380                 }
2381                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2383
2384                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385                 if (ret)
2386                         break;
2387                 if (!nrefs->need_check[*level - 1]) {
2388                         path->slots[*level]++;
2389                         continue;
2390                 }
2391
2392                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394                         free_extent_buffer(next);
2395                         reada_walk_down(root, cur, path->slots[*level]);
2396                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2397                         if (!extent_buffer_uptodate(next)) {
2398                                 struct btrfs_key node_key;
2399
2400                                 btrfs_node_key_to_cpu(path->nodes[*level],
2401                                                       &node_key,
2402                                                       path->slots[*level]);
2403                                 btrfs_add_corrupt_extent_record(fs_info,
2404                                                 &node_key,
2405                                                 path->nodes[*level]->start,
2406                                                 fs_info->nodesize,
2407                                                 *level);
2408                                 ret = -EIO;
2409                                 break;
2410                         }
2411                 }
2412
2413                 ret = check_child_node(cur, path->slots[*level], next);
2414                 if (ret < 0) 
2415                         break;
2416
2417                 if (btrfs_is_leaf(next))
2418                         status = btrfs_check_leaf(root, NULL, next);
2419                 else
2420                         status = btrfs_check_node(root, NULL, next);
2421                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422                         free_extent_buffer(next);
2423                         ret = -EIO;
2424                         break;
2425                 }
2426
2427                 *level = *level - 1;
2428                 free_extent_buffer(path->nodes[*level]);
2429                 path->nodes[*level] = next;
2430                 path->slots[*level] = 0;
2431         }
2432         return ret;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int create_inode_item(struct btrfs_root *root,
2695                              struct inode_record *rec,
2696                              int root_dir)
2697 {
2698         struct btrfs_trans_handle *trans;
2699         struct btrfs_inode_item inode_item;
2700         time_t now = time(NULL);
2701         int ret;
2702
2703         trans = btrfs_start_transaction(root, 1);
2704         if (IS_ERR(trans)) {
2705                 ret = PTR_ERR(trans);
2706                 return ret;
2707         }
2708
2709         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2710                 "be incomplete, please check permissions and content after "
2711                 "the fsck completes.\n", (unsigned long long)root->objectid,
2712                 (unsigned long long)rec->ino);
2713
2714         memset(&inode_item, 0, sizeof(inode_item));
2715         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2716         if (root_dir)
2717                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2718         else
2719                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2720         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2721         if (rec->found_dir_item) {
2722                 if (rec->found_file_extent)
2723                         fprintf(stderr, "root %llu inode %llu has both a dir "
2724                                 "item and extents, unsure if it is a dir or a "
2725                                 "regular file so setting it as a directory\n",
2726                                 (unsigned long long)root->objectid,
2727                                 (unsigned long long)rec->ino);
2728                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2729                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2730         } else if (!rec->found_dir_item) {
2731                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2732                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2733         }
2734         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2735         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2736         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2737         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2738         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2739         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2740         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2741         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2742
2743         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2744         BUG_ON(ret);
2745         btrfs_commit_transaction(trans, root);
2746         return 0;
2747 }
2748
2749 static int repair_inode_backrefs(struct btrfs_root *root,
2750                                  struct inode_record *rec,
2751                                  struct cache_tree *inode_cache,
2752                                  int delete)
2753 {
2754         struct inode_backref *tmp, *backref;
2755         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2756         int ret = 0;
2757         int repaired = 0;
2758
2759         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2760                 if (!delete && rec->ino == root_dirid) {
2761                         if (!rec->found_inode_item) {
2762                                 ret = create_inode_item(root, rec, 1);
2763                                 if (ret)
2764                                         break;
2765                                 repaired++;
2766                         }
2767                 }
2768
2769                 /* Index 0 for root dir's are special, don't mess with it */
2770                 if (rec->ino == root_dirid && backref->index == 0)
2771                         continue;
2772
2773                 if (delete &&
2774                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2775                      (backref->found_dir_index && backref->found_inode_ref &&
2776                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2777                         ret = delete_dir_index(root, backref);
2778                         if (ret)
2779                                 break;
2780                         repaired++;
2781                         list_del(&backref->list);
2782                         free(backref);
2783                         continue;
2784                 }
2785
2786                 if (!delete && !backref->found_dir_index &&
2787                     backref->found_dir_item && backref->found_inode_ref) {
2788                         ret = add_missing_dir_index(root, inode_cache, rec,
2789                                                     backref);
2790                         if (ret)
2791                                 break;
2792                         repaired++;
2793                         if (backref->found_dir_item &&
2794                             backref->found_dir_index) {
2795                                 if (!backref->errors &&
2796                                     backref->found_inode_ref) {
2797                                         list_del(&backref->list);
2798                                         free(backref);
2799                                         continue;
2800                                 }
2801                         }
2802                 }
2803
2804                 if (!delete && (!backref->found_dir_index &&
2805                                 !backref->found_dir_item &&
2806                                 backref->found_inode_ref)) {
2807                         struct btrfs_trans_handle *trans;
2808                         struct btrfs_key location;
2809
2810                         ret = check_dir_conflict(root, backref->name,
2811                                                  backref->namelen,
2812                                                  backref->dir,
2813                                                  backref->index);
2814                         if (ret) {
2815                                 /*
2816                                  * let nlink fixing routine to handle it,
2817                                  * which can do it better.
2818                                  */
2819                                 ret = 0;
2820                                 break;
2821                         }
2822                         location.objectid = rec->ino;
2823                         location.type = BTRFS_INODE_ITEM_KEY;
2824                         location.offset = 0;
2825
2826                         trans = btrfs_start_transaction(root, 1);
2827                         if (IS_ERR(trans)) {
2828                                 ret = PTR_ERR(trans);
2829                                 break;
2830                         }
2831                         fprintf(stderr, "adding missing dir index/item pair "
2832                                 "for inode %llu\n",
2833                                 (unsigned long long)rec->ino);
2834                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2835                                                     backref->namelen,
2836                                                     backref->dir, &location,
2837                                                     imode_to_type(rec->imode),
2838                                                     backref->index);
2839                         BUG_ON(ret);
2840                         btrfs_commit_transaction(trans, root);
2841                         repaired++;
2842                 }
2843
2844                 if (!delete && (backref->found_inode_ref &&
2845                                 backref->found_dir_index &&
2846                                 backref->found_dir_item &&
2847                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2848                                 !rec->found_inode_item)) {
2849                         ret = create_inode_item(root, rec, 0);
2850                         if (ret)
2851                                 break;
2852                         repaired++;
2853                 }
2854
2855         }
2856         return ret ? ret : repaired;
2857 }
2858
2859 /*
2860  * To determine the file type for nlink/inode_item repair
2861  *
2862  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2863  * Return -ENOENT if file type is not found.
2864  */
2865 static int find_file_type(struct inode_record *rec, u8 *type)
2866 {
2867         struct inode_backref *backref;
2868
2869         /* For inode item recovered case */
2870         if (rec->found_inode_item) {
2871                 *type = imode_to_type(rec->imode);
2872                 return 0;
2873         }
2874
2875         list_for_each_entry(backref, &rec->backrefs, list) {
2876                 if (backref->found_dir_index || backref->found_dir_item) {
2877                         *type = backref->filetype;
2878                         return 0;
2879                 }
2880         }
2881         return -ENOENT;
2882 }
2883
2884 /*
2885  * To determine the file name for nlink repair
2886  *
2887  * Return 0 if file name is found, set name and namelen.
2888  * Return -ENOENT if file name is not found.
2889  */
2890 static int find_file_name(struct inode_record *rec,
2891                           char *name, int *namelen)
2892 {
2893         struct inode_backref *backref;
2894
2895         list_for_each_entry(backref, &rec->backrefs, list) {
2896                 if (backref->found_dir_index || backref->found_dir_item ||
2897                     backref->found_inode_ref) {
2898                         memcpy(name, backref->name, backref->namelen);
2899                         *namelen = backref->namelen;
2900                         return 0;
2901                 }
2902         }
2903         return -ENOENT;
2904 }
2905
2906 /* Reset the nlink of the inode to the correct one */
2907 static int reset_nlink(struct btrfs_trans_handle *trans,
2908                        struct btrfs_root *root,
2909                        struct btrfs_path *path,
2910                        struct inode_record *rec)
2911 {
2912         struct inode_backref *backref;
2913         struct inode_backref *tmp;
2914         struct btrfs_key key;
2915         struct btrfs_inode_item *inode_item;
2916         int ret = 0;
2917
2918         /* We don't believe this either, reset it and iterate backref */
2919         rec->found_link = 0;
2920
2921         /* Remove all backref including the valid ones */
2922         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2923                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2924                                    backref->index, backref->name,
2925                                    backref->namelen, 0);
2926                 if (ret < 0)
2927                         goto out;
2928
2929                 /* remove invalid backref, so it won't be added back */
2930                 if (!(backref->found_dir_index &&
2931                       backref->found_dir_item &&
2932                       backref->found_inode_ref)) {
2933                         list_del(&backref->list);
2934                         free(backref);
2935                 } else {
2936                         rec->found_link++;
2937                 }
2938         }
2939
2940         /* Set nlink to 0 */
2941         key.objectid = rec->ino;
2942         key.type = BTRFS_INODE_ITEM_KEY;
2943         key.offset = 0;
2944         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2945         if (ret < 0)
2946                 goto out;
2947         if (ret > 0) {
2948                 ret = -ENOENT;
2949                 goto out;
2950         }
2951         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2952                                     struct btrfs_inode_item);
2953         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2954         btrfs_mark_buffer_dirty(path->nodes[0]);
2955         btrfs_release_path(path);
2956
2957         /*
2958          * Add back valid inode_ref/dir_item/dir_index,
2959          * add_link() will handle the nlink inc, so new nlink must be correct
2960          */
2961         list_for_each_entry(backref, &rec->backrefs, list) {
2962                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2963                                      backref->name, backref->namelen,
2964                                      backref->filetype, &backref->index, 1);
2965                 if (ret < 0)
2966                         goto out;
2967         }
2968 out:
2969         btrfs_release_path(path);
2970         return ret;
2971 }
2972
2973 static int get_highest_inode(struct btrfs_trans_handle *trans,
2974                                 struct btrfs_root *root,
2975                                 struct btrfs_path *path,
2976                                 u64 *highest_ino)
2977 {
2978         struct btrfs_key key, found_key;
2979         int ret;
2980
2981         btrfs_init_path(path);
2982         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2983         key.offset = -1;
2984         key.type = BTRFS_INODE_ITEM_KEY;
2985         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2986         if (ret == 1) {
2987                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2988                                 path->slots[0] - 1);
2989                 *highest_ino = found_key.objectid;
2990                 ret = 0;
2991         }
2992         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2993                 ret = -EOVERFLOW;
2994         btrfs_release_path(path);
2995         return ret;
2996 }
2997
2998 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2999                                struct btrfs_root *root,
3000                                struct btrfs_path *path,
3001                                struct inode_record *rec)
3002 {
3003         char *dir_name = "lost+found";
3004         char namebuf[BTRFS_NAME_LEN] = {0};
3005         u64 lost_found_ino;
3006         u32 mode = 0700;
3007         u8 type = 0;
3008         int namelen = 0;
3009         int name_recovered = 0;
3010         int type_recovered = 0;
3011         int ret = 0;
3012
3013         /*
3014          * Get file name and type first before these invalid inode ref
3015          * are deleted by remove_all_invalid_backref()
3016          */
3017         name_recovered = !find_file_name(rec, namebuf, &namelen);
3018         type_recovered = !find_file_type(rec, &type);
3019
3020         if (!name_recovered) {
3021                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3022                        rec->ino, rec->ino);
3023                 namelen = count_digits(rec->ino);
3024                 sprintf(namebuf, "%llu", rec->ino);
3025                 name_recovered = 1;
3026         }
3027         if (!type_recovered) {
3028                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3029                        rec->ino);
3030                 type = BTRFS_FT_REG_FILE;
3031                 type_recovered = 1;
3032         }
3033
3034         ret = reset_nlink(trans, root, path, rec);
3035         if (ret < 0) {
3036                 fprintf(stderr,
3037                         "Failed to reset nlink for inode %llu: %s\n",
3038                         rec->ino, strerror(-ret));
3039                 goto out;
3040         }
3041
3042         if (rec->found_link == 0) {
3043                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3044                 if (ret < 0)
3045                         goto out;
3046                 lost_found_ino++;
3047                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3048                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3049                                   mode);
3050                 if (ret < 0) {
3051                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3052                                 dir_name, strerror(-ret));
3053                         goto out;
3054                 }
3055                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3056                                      namebuf, namelen, type, NULL, 1);
3057                 /*
3058                  * Add ".INO" suffix several times to handle case where
3059                  * "FILENAME.INO" is already taken by another file.
3060                  */
3061                 while (ret == -EEXIST) {
3062                         /*
3063                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3064                          */
3065                         if (namelen + count_digits(rec->ino) + 1 >
3066                             BTRFS_NAME_LEN) {
3067                                 ret = -EFBIG;
3068                                 goto out;
3069                         }
3070                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3071                                  ".%llu", rec->ino);
3072                         namelen += count_digits(rec->ino) + 1;
3073                         ret = btrfs_add_link(trans, root, rec->ino,
3074                                              lost_found_ino, namebuf,
3075                                              namelen, type, NULL, 1);
3076                 }
3077                 if (ret < 0) {
3078                         fprintf(stderr,
3079                                 "Failed to link the inode %llu to %s dir: %s\n",
3080                                 rec->ino, dir_name, strerror(-ret));
3081                         goto out;
3082                 }
3083                 /*
3084                  * Just increase the found_link, don't actually add the
3085                  * backref. This will make things easier and this inode
3086                  * record will be freed after the repair is done.
3087                  * So fsck will not report problem about this inode.
3088                  */
3089                 rec->found_link++;
3090                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3091                        namelen, namebuf, dir_name);
3092         }
3093         printf("Fixed the nlink of inode %llu\n", rec->ino);
3094 out:
3095         /*
3096          * Clear the flag anyway, or we will loop forever for the same inode
3097          * as it will not be removed from the bad inode list and the dead loop
3098          * happens.
3099          */
3100         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3101         btrfs_release_path(path);
3102         return ret;
3103 }
3104
3105 /*
3106  * Check if there is any normal(reg or prealloc) file extent for given
3107  * ino.
3108  * This is used to determine the file type when neither its dir_index/item or
3109  * inode_item exists.
3110  *
3111  * This will *NOT* report error, if any error happens, just consider it does
3112  * not have any normal file extent.
3113  */
3114 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3115 {
3116         struct btrfs_path path;
3117         struct btrfs_key key;
3118         struct btrfs_key found_key;
3119         struct btrfs_file_extent_item *fi;
3120         u8 type;
3121         int ret = 0;
3122
3123         btrfs_init_path(&path);
3124         key.objectid = ino;
3125         key.type = BTRFS_EXTENT_DATA_KEY;
3126         key.offset = 0;
3127
3128         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3129         if (ret < 0) {
3130                 ret = 0;
3131                 goto out;
3132         }
3133         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3134                 ret = btrfs_next_leaf(root, &path);
3135                 if (ret) {
3136                         ret = 0;
3137                         goto out;
3138                 }
3139         }
3140         while (1) {
3141                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3142                                       path.slots[0]);
3143                 if (found_key.objectid != ino ||
3144                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3145                         break;
3146                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3147                                     struct btrfs_file_extent_item);
3148                 type = btrfs_file_extent_type(path.nodes[0], fi);
3149                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3150                         ret = 1;
3151                         goto out;
3152                 }
3153         }
3154 out:
3155         btrfs_release_path(&path);
3156         return ret;
3157 }
3158
3159 static u32 btrfs_type_to_imode(u8 type)
3160 {
3161         static u32 imode_by_btrfs_type[] = {
3162                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3163                 [BTRFS_FT_DIR]          = S_IFDIR,
3164                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3165                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3166                 [BTRFS_FT_FIFO]         = S_IFIFO,
3167                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3168                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3169         };
3170
3171         return imode_by_btrfs_type[(type)];
3172 }
3173
3174 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3175                                 struct btrfs_root *root,
3176                                 struct btrfs_path *path,
3177                                 struct inode_record *rec)
3178 {
3179         u8 filetype;
3180         u32 mode = 0700;
3181         int type_recovered = 0;
3182         int ret = 0;
3183
3184         printf("Trying to rebuild inode:%llu\n", rec->ino);
3185
3186         type_recovered = !find_file_type(rec, &filetype);
3187
3188         /*
3189          * Try to determine inode type if type not found.
3190          *
3191          * For found regular file extent, it must be FILE.
3192          * For found dir_item/index, it must be DIR.
3193          *
3194          * For undetermined one, use FILE as fallback.
3195          *
3196          * TODO:
3197          * 1. If found backref(inode_index/item is already handled) to it,
3198          *    it must be DIR.
3199          *    Need new inode-inode ref structure to allow search for that.
3200          */
3201         if (!type_recovered) {
3202                 if (rec->found_file_extent &&
3203                     find_normal_file_extent(root, rec->ino)) {
3204                         type_recovered = 1;
3205                         filetype = BTRFS_FT_REG_FILE;
3206                 } else if (rec->found_dir_item) {
3207                         type_recovered = 1;
3208                         filetype = BTRFS_FT_DIR;
3209                 } else if (!list_empty(&rec->orphan_extents)) {
3210                         type_recovered = 1;
3211                         filetype = BTRFS_FT_REG_FILE;
3212                 } else{
3213                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3214                                rec->ino);
3215                         type_recovered = 1;
3216                         filetype = BTRFS_FT_REG_FILE;
3217                 }
3218         }
3219
3220         ret = btrfs_new_inode(trans, root, rec->ino,
3221                               mode | btrfs_type_to_imode(filetype));
3222         if (ret < 0)
3223                 goto out;
3224
3225         /*
3226          * Here inode rebuild is done, we only rebuild the inode item,
3227          * don't repair the nlink(like move to lost+found).
3228          * That is the job of nlink repair.
3229          *
3230          * We just fill the record and return
3231          */
3232         rec->found_dir_item = 1;
3233         rec->imode = mode | btrfs_type_to_imode(filetype);
3234         rec->nlink = 0;
3235         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3236         /* Ensure the inode_nlinks repair function will be called */
3237         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3238 out:
3239         return ret;
3240 }
3241
3242 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3243                                       struct btrfs_root *root,
3244                                       struct btrfs_path *path,
3245                                       struct inode_record *rec)
3246 {
3247         struct orphan_data_extent *orphan;
3248         struct orphan_data_extent *tmp;
3249         int ret = 0;
3250
3251         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3252                 /*
3253                  * Check for conflicting file extents
3254                  *
3255                  * Here we don't know whether the extents is compressed or not,
3256                  * so we can only assume it not compressed nor data offset,
3257                  * and use its disk_len as extent length.
3258                  */
3259                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3260                                        orphan->offset, orphan->disk_len, 0);
3261                 btrfs_release_path(path);
3262                 if (ret < 0)
3263                         goto out;
3264                 if (!ret) {
3265                         fprintf(stderr,
3266                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3267                                 orphan->disk_bytenr, orphan->disk_len);
3268                         ret = btrfs_free_extent(trans,
3269                                         root->fs_info->extent_root,
3270                                         orphan->disk_bytenr, orphan->disk_len,
3271                                         0, root->objectid, orphan->objectid,
3272                                         orphan->offset);
3273                         if (ret < 0)
3274                                 goto out;
3275                 }
3276                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3277                                 orphan->offset, orphan->disk_bytenr,
3278                                 orphan->disk_len, orphan->disk_len);
3279                 if (ret < 0)
3280                         goto out;
3281
3282                 /* Update file size info */
3283                 rec->found_size += orphan->disk_len;
3284                 if (rec->found_size == rec->nbytes)
3285                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3286
3287                 /* Update the file extent hole info too */
3288                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3289                                            orphan->disk_len);
3290                 if (ret < 0)
3291                         goto out;
3292                 if (RB_EMPTY_ROOT(&rec->holes))
3293                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3294
3295                 list_del(&orphan->list);
3296                 free(orphan);
3297         }
3298         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3299 out:
3300         return ret;
3301 }
3302
3303 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3304                                         struct btrfs_root *root,
3305                                         struct btrfs_path *path,
3306                                         struct inode_record *rec)
3307 {
3308         struct rb_node *node;
3309         struct file_extent_hole *hole;
3310         int found = 0;
3311         int ret = 0;
3312
3313         node = rb_first(&rec->holes);
3314
3315         while (node) {
3316                 found = 1;
3317                 hole = rb_entry(node, struct file_extent_hole, node);
3318                 ret = btrfs_punch_hole(trans, root, rec->ino,
3319                                        hole->start, hole->len);
3320                 if (ret < 0)
3321                         goto out;
3322                 ret = del_file_extent_hole(&rec->holes, hole->start,
3323                                            hole->len);
3324                 if (ret < 0)
3325                         goto out;
3326                 if (RB_EMPTY_ROOT(&rec->holes))
3327                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3328                 node = rb_first(&rec->holes);
3329         }
3330         /* special case for a file losing all its file extent */
3331         if (!found) {
3332                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3333                                        round_up(rec->isize,
3334                                                 root->fs_info->sectorsize));
3335                 if (ret < 0)
3336                         goto out;
3337         }
3338         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3339                rec->ino, root->objectid);
3340 out:
3341         return ret;
3342 }
3343
3344 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3345 {
3346         struct btrfs_trans_handle *trans;
3347         struct btrfs_path path;
3348         int ret = 0;
3349
3350         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3351                              I_ERR_NO_ORPHAN_ITEM |
3352                              I_ERR_LINK_COUNT_WRONG |
3353                              I_ERR_NO_INODE_ITEM |
3354                              I_ERR_FILE_EXTENT_ORPHAN |
3355                              I_ERR_FILE_EXTENT_DISCOUNT|
3356                              I_ERR_FILE_NBYTES_WRONG)))
3357                 return rec->errors;
3358
3359         /*
3360          * For nlink repair, it may create a dir and add link, so
3361          * 2 for parent(256)'s dir_index and dir_item
3362          * 2 for lost+found dir's inode_item and inode_ref
3363          * 1 for the new inode_ref of the file
3364          * 2 for lost+found dir's dir_index and dir_item for the file
3365          */
3366         trans = btrfs_start_transaction(root, 7);
3367         if (IS_ERR(trans))
3368                 return PTR_ERR(trans);
3369
3370         btrfs_init_path(&path);
3371         if (rec->errors & I_ERR_NO_INODE_ITEM)
3372                 ret = repair_inode_no_item(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3374                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3376                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3378                 ret = repair_inode_isize(trans, root, &path, rec);
3379         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3380                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3381         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3382                 ret = repair_inode_nlinks(trans, root, &path, rec);
3383         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3384                 ret = repair_inode_nbytes(trans, root, &path, rec);
3385         btrfs_commit_transaction(trans, root);
3386         btrfs_release_path(&path);
3387         return ret;
3388 }
3389
3390 static int check_inode_recs(struct btrfs_root *root,
3391                             struct cache_tree *inode_cache)
3392 {
3393         struct cache_extent *cache;
3394         struct ptr_node *node;
3395         struct inode_record *rec;
3396         struct inode_backref *backref;
3397         int stage = 0;
3398         int ret = 0;
3399         int err = 0;
3400         u64 error = 0;
3401         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3402
3403         if (btrfs_root_refs(&root->root_item) == 0) {
3404                 if (!cache_tree_empty(inode_cache))
3405                         fprintf(stderr, "warning line %d\n", __LINE__);
3406                 return 0;
3407         }
3408
3409         /*
3410          * We need to repair backrefs first because we could change some of the
3411          * errors in the inode recs.
3412          *
3413          * We also need to go through and delete invalid backrefs first and then
3414          * add the correct ones second.  We do this because we may get EEXIST
3415          * when adding back the correct index because we hadn't yet deleted the
3416          * invalid index.
3417          *
3418          * For example, if we were missing a dir index then the directories
3419          * isize would be wrong, so if we fixed the isize to what we thought it
3420          * would be and then fixed the backref we'd still have a invalid fs, so
3421          * we need to add back the dir index and then check to see if the isize
3422          * is still wrong.
3423          */
3424         while (stage < 3) {
3425                 stage++;
3426                 if (stage == 3 && !err)
3427                         break;
3428
3429                 cache = search_cache_extent(inode_cache, 0);
3430                 while (repair && cache) {
3431                         node = container_of(cache, struct ptr_node, cache);
3432                         rec = node->data;
3433                         cache = next_cache_extent(cache);
3434
3435                         /* Need to free everything up and rescan */
3436                         if (stage == 3) {
3437                                 remove_cache_extent(inode_cache, &node->cache);
3438                                 free(node);
3439                                 free_inode_rec(rec);
3440                                 continue;
3441                         }
3442
3443                         if (list_empty(&rec->backrefs))
3444                                 continue;
3445
3446                         ret = repair_inode_backrefs(root, rec, inode_cache,
3447                                                     stage == 1);
3448                         if (ret < 0) {
3449                                 err = ret;
3450                                 stage = 2;
3451                                 break;
3452                         } if (ret > 0) {
3453                                 err = -EAGAIN;
3454                         }
3455                 }
3456         }
3457         if (err)
3458                 return err;
3459
3460         rec = get_inode_rec(inode_cache, root_dirid, 0);
3461         BUG_ON(IS_ERR(rec));
3462         if (rec) {
3463                 ret = check_root_dir(rec);
3464                 if (ret) {
3465                         fprintf(stderr, "root %llu root dir %llu error\n",
3466                                 (unsigned long long)root->root_key.objectid,
3467                                 (unsigned long long)root_dirid);
3468                         print_inode_error(root, rec);
3469                         error++;
3470                 }
3471         } else {
3472                 if (repair) {
3473                         struct btrfs_trans_handle *trans;
3474
3475                         trans = btrfs_start_transaction(root, 1);
3476                         if (IS_ERR(trans)) {
3477                                 err = PTR_ERR(trans);
3478                                 return err;
3479                         }
3480
3481                         fprintf(stderr,
3482                                 "root %llu missing its root dir, recreating\n",
3483                                 (unsigned long long)root->objectid);
3484
3485                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3486                         BUG_ON(ret);
3487
3488                         btrfs_commit_transaction(trans, root);
3489                         return -EAGAIN;
3490                 }
3491
3492                 fprintf(stderr, "root %llu root dir %llu not found\n",
3493                         (unsigned long long)root->root_key.objectid,
3494                         (unsigned long long)root_dirid);
3495         }
3496
3497         while (1) {
3498                 cache = search_cache_extent(inode_cache, 0);
3499                 if (!cache)
3500                         break;
3501                 node = container_of(cache, struct ptr_node, cache);
3502                 rec = node->data;
3503                 remove_cache_extent(inode_cache, &node->cache);
3504                 free(node);
3505                 if (rec->ino == root_dirid ||
3506                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3507                         free_inode_rec(rec);
3508                         continue;
3509                 }
3510
3511                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3512                         ret = check_orphan_item(root, rec->ino);
3513                         if (ret == 0)
3514                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3515                         if (can_free_inode_rec(rec)) {
3516                                 free_inode_rec(rec);
3517                                 continue;
3518                         }
3519                 }
3520
3521                 if (!rec->found_inode_item)
3522                         rec->errors |= I_ERR_NO_INODE_ITEM;
3523                 if (rec->found_link != rec->nlink)
3524                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3525                 if (repair) {
3526                         ret = try_repair_inode(root, rec);
3527                         if (ret == 0 && can_free_inode_rec(rec)) {
3528                                 free_inode_rec(rec);
3529                                 continue;
3530                         }
3531                         ret = 0;
3532                 }
3533
3534                 if (!(repair && ret == 0))
3535                         error++;
3536                 print_inode_error(root, rec);
3537                 list_for_each_entry(backref, &rec->backrefs, list) {
3538                         if (!backref->found_dir_item)
3539                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3540                         if (!backref->found_dir_index)
3541                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3542                         if (!backref->found_inode_ref)
3543                                 backref->errors |= REF_ERR_NO_INODE_REF;
3544                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3545                                 " namelen %u name %s filetype %d errors %x",
3546                                 (unsigned long long)backref->dir,
3547                                 (unsigned long long)backref->index,
3548                                 backref->namelen, backref->name,
3549                                 backref->filetype, backref->errors);
3550                         print_ref_error(backref->errors);
3551                 }
3552                 free_inode_rec(rec);
3553         }
3554         return (error > 0) ? -1 : 0;
3555 }
3556
3557 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3558                                         u64 objectid)
3559 {
3560         struct cache_extent *cache;
3561         struct root_record *rec = NULL;
3562         int ret;
3563
3564         cache = lookup_cache_extent(root_cache, objectid, 1);
3565         if (cache) {
3566                 rec = container_of(cache, struct root_record, cache);
3567         } else {
3568                 rec = calloc(1, sizeof(*rec));
3569                 if (!rec)
3570                         return ERR_PTR(-ENOMEM);
3571                 rec->objectid = objectid;
3572                 INIT_LIST_HEAD(&rec->backrefs);
3573                 rec->cache.start = objectid;
3574                 rec->cache.size = 1;
3575
3576                 ret = insert_cache_extent(root_cache, &rec->cache);
3577                 if (ret)
3578                         return ERR_PTR(-EEXIST);
3579         }
3580         return rec;
3581 }
3582
3583 static struct root_backref *get_root_backref(struct root_record *rec,
3584                                              u64 ref_root, u64 dir, u64 index,
3585                                              const char *name, int namelen)
3586 {
3587         struct root_backref *backref;
3588
3589         list_for_each_entry(backref, &rec->backrefs, list) {
3590                 if (backref->ref_root != ref_root || backref->dir != dir ||
3591                     backref->namelen != namelen)
3592                         continue;
3593                 if (memcmp(name, backref->name, namelen))
3594                         continue;
3595                 return backref;
3596         }
3597
3598         backref = calloc(1, sizeof(*backref) + namelen + 1);
3599         if (!backref)
3600                 return NULL;
3601         backref->ref_root = ref_root;
3602         backref->dir = dir;
3603         backref->index = index;
3604         backref->namelen = namelen;
3605         memcpy(backref->name, name, namelen);
3606         backref->name[namelen] = '\0';
3607         list_add_tail(&backref->list, &rec->backrefs);
3608         return backref;
3609 }
3610
3611 static void free_root_record(struct cache_extent *cache)
3612 {
3613         struct root_record *rec;
3614         struct root_backref *backref;
3615
3616         rec = container_of(cache, struct root_record, cache);
3617         while (!list_empty(&rec->backrefs)) {
3618                 backref = to_root_backref(rec->backrefs.next);
3619                 list_del(&backref->list);
3620                 free(backref);
3621         }
3622
3623         free(rec);
3624 }
3625
3626 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3627
3628 static int add_root_backref(struct cache_tree *root_cache,
3629                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3630                             const char *name, int namelen,
3631                             int item_type, int errors)
3632 {
3633         struct root_record *rec;
3634         struct root_backref *backref;
3635
3636         rec = get_root_rec(root_cache, root_id);
3637         BUG_ON(IS_ERR(rec));
3638         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3639         BUG_ON(!backref);
3640
3641         backref->errors |= errors;
3642
3643         if (item_type != BTRFS_DIR_ITEM_KEY) {
3644                 if (backref->found_dir_index || backref->found_back_ref ||
3645                     backref->found_forward_ref) {
3646                         if (backref->index != index)
3647                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3648                 } else {
3649                         backref->index = index;
3650                 }
3651         }
3652
3653         if (item_type == BTRFS_DIR_ITEM_KEY) {
3654                 if (backref->found_forward_ref)
3655                         rec->found_ref++;
3656                 backref->found_dir_item = 1;
3657         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3658                 backref->found_dir_index = 1;
3659         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3660                 if (backref->found_forward_ref)
3661                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3662                 else if (backref->found_dir_item)
3663                         rec->found_ref++;
3664                 backref->found_forward_ref = 1;
3665         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3666                 if (backref->found_back_ref)
3667                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3668                 backref->found_back_ref = 1;
3669         } else {
3670                 BUG_ON(1);
3671         }
3672
3673         if (backref->found_forward_ref && backref->found_dir_item)
3674                 backref->reachable = 1;
3675         return 0;
3676 }
3677
3678 static int merge_root_recs(struct btrfs_root *root,
3679                            struct cache_tree *src_cache,
3680                            struct cache_tree *dst_cache)
3681 {
3682         struct cache_extent *cache;
3683         struct ptr_node *node;
3684         struct inode_record *rec;
3685         struct inode_backref *backref;
3686         int ret = 0;
3687
3688         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3689                 free_inode_recs_tree(src_cache);
3690                 return 0;
3691         }
3692
3693         while (1) {
3694                 cache = search_cache_extent(src_cache, 0);
3695                 if (!cache)
3696                         break;
3697                 node = container_of(cache, struct ptr_node, cache);
3698                 rec = node->data;
3699                 remove_cache_extent(src_cache, &node->cache);
3700                 free(node);
3701
3702                 ret = is_child_root(root, root->objectid, rec->ino);
3703                 if (ret < 0)
3704                         break;
3705                 else if (ret == 0)
3706                         goto skip;
3707
3708                 list_for_each_entry(backref, &rec->backrefs, list) {
3709                         BUG_ON(backref->found_inode_ref);
3710                         if (backref->found_dir_item)
3711                                 add_root_backref(dst_cache, rec->ino,
3712                                         root->root_key.objectid, backref->dir,
3713                                         backref->index, backref->name,
3714                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3715                                         backref->errors);
3716                         if (backref->found_dir_index)
3717                                 add_root_backref(dst_cache, rec->ino,
3718                                         root->root_key.objectid, backref->dir,
3719                                         backref->index, backref->name,
3720                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3721                                         backref->errors);
3722                 }
3723 skip:
3724                 free_inode_rec(rec);
3725         }
3726         if (ret < 0)
3727                 return ret;
3728         return 0;
3729 }
3730
3731 static int check_root_refs(struct btrfs_root *root,
3732                            struct cache_tree *root_cache)
3733 {
3734         struct root_record *rec;
3735         struct root_record *ref_root;
3736         struct root_backref *backref;
3737         struct cache_extent *cache;
3738         int loop = 1;
3739         int ret;
3740         int error;
3741         int errors = 0;
3742
3743         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3744         BUG_ON(IS_ERR(rec));
3745         rec->found_ref = 1;
3746
3747         /* fixme: this can not detect circular references */
3748         while (loop) {
3749                 loop = 0;
3750                 cache = search_cache_extent(root_cache, 0);
3751                 while (1) {
3752                         if (!cache)
3753                                 break;
3754                         rec = container_of(cache, struct root_record, cache);
3755                         cache = next_cache_extent(cache);
3756
3757                         if (rec->found_ref == 0)
3758                                 continue;
3759
3760                         list_for_each_entry(backref, &rec->backrefs, list) {
3761                                 if (!backref->reachable)
3762                                         continue;
3763
3764                                 ref_root = get_root_rec(root_cache,
3765                                                         backref->ref_root);
3766                                 BUG_ON(IS_ERR(ref_root));
3767                                 if (ref_root->found_ref > 0)
3768                                         continue;
3769
3770                                 backref->reachable = 0;
3771                                 rec->found_ref--;
3772                                 if (rec->found_ref == 0)
3773                                         loop = 1;
3774                         }
3775                 }
3776         }
3777
3778         cache = search_cache_extent(root_cache, 0);
3779         while (1) {
3780                 if (!cache)
3781                         break;
3782                 rec = container_of(cache, struct root_record, cache);
3783                 cache = next_cache_extent(cache);
3784
3785                 if (rec->found_ref == 0 &&
3786                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3787                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3788                         ret = check_orphan_item(root->fs_info->tree_root,
3789                                                 rec->objectid);
3790                         if (ret == 0)
3791                                 continue;
3792
3793                         /*
3794                          * If we don't have a root item then we likely just have
3795                          * a dir item in a snapshot for this root but no actual
3796                          * ref key or anything so it's meaningless.
3797                          */
3798                         if (!rec->found_root_item)
3799                                 continue;
3800                         errors++;
3801                         fprintf(stderr, "fs tree %llu not referenced\n",
3802                                 (unsigned long long)rec->objectid);
3803                 }
3804
3805                 error = 0;
3806                 if (rec->found_ref > 0 && !rec->found_root_item)
3807                         error = 1;
3808                 list_for_each_entry(backref, &rec->backrefs, list) {
3809                         if (!backref->found_dir_item)
3810                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3811                         if (!backref->found_dir_index)
3812                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3813                         if (!backref->found_back_ref)
3814                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3815                         if (!backref->found_forward_ref)
3816                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3817                         if (backref->reachable && backref->errors)
3818                                 error = 1;
3819                 }
3820                 if (!error)
3821                         continue;
3822
3823                 errors++;
3824                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3825                         (unsigned long long)rec->objectid, rec->found_ref,
3826                          rec->found_root_item ? "" : "not found");
3827
3828                 list_for_each_entry(backref, &rec->backrefs, list) {
3829                         if (!backref->reachable)
3830                                 continue;
3831                         if (!backref->errors && rec->found_root_item)
3832                                 continue;
3833                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3834                                 " index %llu namelen %u name %s errors %x\n",
3835                                 (unsigned long long)backref->ref_root,
3836                                 (unsigned long long)backref->dir,
3837                                 (unsigned long long)backref->index,
3838                                 backref->namelen, backref->name,
3839                                 backref->errors);
3840                         print_ref_error(backref->errors);
3841                 }
3842         }
3843         return errors > 0 ? 1 : 0;
3844 }
3845
3846 static int process_root_ref(struct extent_buffer *eb, int slot,
3847                             struct btrfs_key *key,
3848                             struct cache_tree *root_cache)
3849 {
3850         u64 dirid;
3851         u64 index;
3852         u32 len;
3853         u32 name_len;
3854         struct btrfs_root_ref *ref;
3855         char namebuf[BTRFS_NAME_LEN];
3856         int error;
3857
3858         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3859
3860         dirid = btrfs_root_ref_dirid(eb, ref);
3861         index = btrfs_root_ref_sequence(eb, ref);
3862         name_len = btrfs_root_ref_name_len(eb, ref);
3863
3864         if (name_len <= BTRFS_NAME_LEN) {
3865                 len = name_len;
3866                 error = 0;
3867         } else {
3868                 len = BTRFS_NAME_LEN;
3869                 error = REF_ERR_NAME_TOO_LONG;
3870         }
3871         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3872
3873         if (key->type == BTRFS_ROOT_REF_KEY) {
3874                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3875                                  index, namebuf, len, key->type, error);
3876         } else {
3877                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3878                                  index, namebuf, len, key->type, error);
3879         }
3880         return 0;
3881 }
3882
3883 static void free_corrupt_block(struct cache_extent *cache)
3884 {
3885         struct btrfs_corrupt_block *corrupt;
3886
3887         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3888         free(corrupt);
3889 }
3890
3891 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3892
3893 /*
3894  * Repair the btree of the given root.
3895  *
3896  * The fix is to remove the node key in corrupt_blocks cache_tree.
3897  * and rebalance the tree.
3898  * After the fix, the btree should be writeable.
3899  */
3900 static int repair_btree(struct btrfs_root *root,
3901                         struct cache_tree *corrupt_blocks)
3902 {
3903         struct btrfs_trans_handle *trans;
3904         struct btrfs_path path;
3905         struct btrfs_corrupt_block *corrupt;
3906         struct cache_extent *cache;
3907         struct btrfs_key key;
3908         u64 offset;
3909         int level;
3910         int ret = 0;
3911
3912         if (cache_tree_empty(corrupt_blocks))
3913                 return 0;
3914
3915         trans = btrfs_start_transaction(root, 1);
3916         if (IS_ERR(trans)) {
3917                 ret = PTR_ERR(trans);
3918                 fprintf(stderr, "Error starting transaction: %s\n",
3919                         strerror(-ret));
3920                 return ret;
3921         }
3922         btrfs_init_path(&path);
3923         cache = first_cache_extent(corrupt_blocks);
3924         while (cache) {
3925                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3926                                        cache);
3927                 level = corrupt->level;
3928                 path.lowest_level = level;
3929                 key.objectid = corrupt->key.objectid;
3930                 key.type = corrupt->key.type;
3931                 key.offset = corrupt->key.offset;
3932
3933                 /*
3934                  * Here we don't want to do any tree balance, since it may
3935                  * cause a balance with corrupted brother leaf/node,
3936                  * so ins_len set to 0 here.
3937                  * Balance will be done after all corrupt node/leaf is deleted.
3938                  */
3939                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3940                 if (ret < 0)
3941                         goto out;
3942                 offset = btrfs_node_blockptr(path.nodes[level],
3943                                              path.slots[level]);
3944
3945                 /* Remove the ptr */
3946                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3947                 if (ret < 0)
3948                         goto out;
3949                 /*
3950                  * Remove the corresponding extent
3951                  * return value is not concerned.
3952                  */
3953                 btrfs_release_path(&path);
3954                 ret = btrfs_free_extent(trans, root, offset,
3955                                 root->fs_info->nodesize, 0,
3956                                 root->root_key.objectid, level - 1, 0);
3957                 cache = next_cache_extent(cache);
3958         }
3959
3960         /* Balance the btree using btrfs_search_slot() */
3961         cache = first_cache_extent(corrupt_blocks);
3962         while (cache) {
3963                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3964                                        cache);
3965                 memcpy(&key, &corrupt->key, sizeof(key));
3966                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3967                 if (ret < 0)
3968                         goto out;
3969                 /* return will always >0 since it won't find the item */
3970                 ret = 0;
3971                 btrfs_release_path(&path);
3972                 cache = next_cache_extent(cache);
3973         }
3974 out:
3975         btrfs_commit_transaction(trans, root);
3976         btrfs_release_path(&path);
3977         return ret;
3978 }
3979
3980 static int check_fs_root(struct btrfs_root *root,
3981                          struct cache_tree *root_cache,
3982                          struct walk_control *wc)
3983 {
3984         int ret = 0;
3985         int err = 0;
3986         int wret;
3987         int level;
3988         struct btrfs_path path;
3989         struct shared_node root_node;
3990         struct root_record *rec;
3991         struct btrfs_root_item *root_item = &root->root_item;
3992         struct cache_tree corrupt_blocks;
3993         struct orphan_data_extent *orphan;
3994         struct orphan_data_extent *tmp;
3995         enum btrfs_tree_block_status status;
3996         struct node_refs nrefs;
3997
3998         /*
3999          * Reuse the corrupt_block cache tree to record corrupted tree block
4000          *
4001          * Unlike the usage in extent tree check, here we do it in a per
4002          * fs/subvol tree base.
4003          */
4004         cache_tree_init(&corrupt_blocks);
4005         root->fs_info->corrupt_blocks = &corrupt_blocks;
4006
4007         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4008                 rec = get_root_rec(root_cache, root->root_key.objectid);
4009                 BUG_ON(IS_ERR(rec));
4010                 if (btrfs_root_refs(root_item) > 0)
4011                         rec->found_root_item = 1;
4012         }
4013
4014         btrfs_init_path(&path);
4015         memset(&root_node, 0, sizeof(root_node));
4016         cache_tree_init(&root_node.root_cache);
4017         cache_tree_init(&root_node.inode_cache);
4018         memset(&nrefs, 0, sizeof(nrefs));
4019
4020         /* Move the orphan extent record to corresponding inode_record */
4021         list_for_each_entry_safe(orphan, tmp,
4022                                  &root->orphan_data_extents, list) {
4023                 struct inode_record *inode;
4024
4025                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4026                                       1);
4027                 BUG_ON(IS_ERR(inode));
4028                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4029                 list_move(&orphan->list, &inode->orphan_extents);
4030         }
4031
4032         level = btrfs_header_level(root->node);
4033         memset(wc->nodes, 0, sizeof(wc->nodes));
4034         wc->nodes[level] = &root_node;
4035         wc->active_node = level;
4036         wc->root_level = level;
4037
4038         /* We may not have checked the root block, lets do that now */
4039         if (btrfs_is_leaf(root->node))
4040                 status = btrfs_check_leaf(root, NULL, root->node);
4041         else
4042                 status = btrfs_check_node(root, NULL, root->node);
4043         if (status != BTRFS_TREE_BLOCK_CLEAN)
4044                 return -EIO;
4045
4046         if (btrfs_root_refs(root_item) > 0 ||
4047             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4048                 path.nodes[level] = root->node;
4049                 extent_buffer_get(root->node);
4050                 path.slots[level] = 0;
4051         } else {
4052                 struct btrfs_key key;
4053                 struct btrfs_disk_key found_key;
4054
4055                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4056                 level = root_item->drop_level;
4057                 path.lowest_level = level;
4058                 if (level > btrfs_header_level(root->node) ||
4059                     level >= BTRFS_MAX_LEVEL) {
4060                         error("ignoring invalid drop level: %u", level);
4061                         goto skip_walking;
4062                 }
4063                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4064                 if (wret < 0)
4065                         goto skip_walking;
4066                 btrfs_node_key(path.nodes[level], &found_key,
4067                                 path.slots[level]);
4068                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4069                                         sizeof(found_key)));
4070         }
4071
4072         while (1) {
4073                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4074                 if (wret < 0)
4075                         ret = wret;
4076                 if (wret != 0)
4077                         break;
4078
4079                 wret = walk_up_tree(root, &path, wc, &level);
4080                 if (wret < 0)
4081                         ret = wret;
4082                 if (wret != 0)
4083                         break;
4084         }
4085 skip_walking:
4086         btrfs_release_path(&path);
4087
4088         if (!cache_tree_empty(&corrupt_blocks)) {
4089                 struct cache_extent *cache;
4090                 struct btrfs_corrupt_block *corrupt;
4091
4092                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4093                        root->root_key.objectid);
4094                 cache = first_cache_extent(&corrupt_blocks);
4095                 while (cache) {
4096                         corrupt = container_of(cache,
4097                                                struct btrfs_corrupt_block,
4098                                                cache);
4099                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4100                                cache->start, corrupt->level,
4101                                corrupt->key.objectid, corrupt->key.type,
4102                                corrupt->key.offset);
4103                         cache = next_cache_extent(cache);
4104                 }
4105                 if (repair) {
4106                         printf("Try to repair the btree for root %llu\n",
4107                                root->root_key.objectid);
4108                         ret = repair_btree(root, &corrupt_blocks);
4109                         if (ret < 0)
4110                                 fprintf(stderr, "Failed to repair btree: %s\n",
4111                                         strerror(-ret));
4112                         if (!ret)
4113                                 printf("Btree for root %llu is fixed\n",
4114                                        root->root_key.objectid);
4115                 }
4116         }
4117
4118         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4119         if (err < 0)
4120                 ret = err;
4121
4122         if (root_node.current) {
4123                 root_node.current->checked = 1;
4124                 maybe_free_inode_rec(&root_node.inode_cache,
4125                                 root_node.current);
4126         }
4127
4128         err = check_inode_recs(root, &root_node.inode_cache);
4129         if (!ret)
4130                 ret = err;
4131
4132         free_corrupt_blocks_tree(&corrupt_blocks);
4133         root->fs_info->corrupt_blocks = NULL;
4134         free_orphan_data_extents(&root->orphan_data_extents);
4135         return ret;
4136 }
4137
4138 static int fs_root_objectid(u64 objectid)
4139 {
4140         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4141             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4142                 return 1;
4143         return is_fstree(objectid);
4144 }
4145
4146 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4147                           struct cache_tree *root_cache)
4148 {
4149         struct btrfs_path path;
4150         struct btrfs_key key;
4151         struct walk_control wc;
4152         struct extent_buffer *leaf, *tree_node;
4153         struct btrfs_root *tmp_root;
4154         struct btrfs_root *tree_root = fs_info->tree_root;
4155         int ret;
4156         int err = 0;
4157
4158         if (ctx.progress_enabled) {
4159                 ctx.tp = TASK_FS_ROOTS;
4160                 task_start(ctx.info);
4161         }
4162
4163         /*
4164          * Just in case we made any changes to the extent tree that weren't
4165          * reflected into the free space cache yet.
4166          */
4167         if (repair)
4168                 reset_cached_block_groups(fs_info);
4169         memset(&wc, 0, sizeof(wc));
4170         cache_tree_init(&wc.shared);
4171         btrfs_init_path(&path);
4172
4173 again:
4174         key.offset = 0;
4175         key.objectid = 0;
4176         key.type = BTRFS_ROOT_ITEM_KEY;
4177         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4178         if (ret < 0) {
4179                 err = 1;
4180                 goto out;
4181         }
4182         tree_node = tree_root->node;
4183         while (1) {
4184                 if (tree_node != tree_root->node) {
4185                         free_root_recs_tree(root_cache);
4186                         btrfs_release_path(&path);
4187                         goto again;
4188                 }
4189                 leaf = path.nodes[0];
4190                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4191                         ret = btrfs_next_leaf(tree_root, &path);
4192                         if (ret) {
4193                                 if (ret < 0)
4194                                         err = 1;
4195                                 break;
4196                         }
4197                         leaf = path.nodes[0];
4198                 }
4199                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4200                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4201                     fs_root_objectid(key.objectid)) {
4202                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4203                                 tmp_root = btrfs_read_fs_root_no_cache(
4204                                                 fs_info, &key);
4205                         } else {
4206                                 key.offset = (u64)-1;
4207                                 tmp_root = btrfs_read_fs_root(
4208                                                 fs_info, &key);
4209                         }
4210                         if (IS_ERR(tmp_root)) {
4211                                 err = 1;
4212                                 goto next;
4213                         }
4214                         ret = check_fs_root(tmp_root, root_cache, &wc);
4215                         if (ret == -EAGAIN) {
4216                                 free_root_recs_tree(root_cache);
4217                                 btrfs_release_path(&path);
4218                                 goto again;
4219                         }
4220                         if (ret)
4221                                 err = 1;
4222                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4223                                 btrfs_free_fs_root(tmp_root);
4224                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4225                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4226                         process_root_ref(leaf, path.slots[0], &key,
4227                                          root_cache);
4228                 }
4229 next:
4230                 path.slots[0]++;
4231         }
4232 out:
4233         btrfs_release_path(&path);
4234         if (err)
4235                 free_extent_cache_tree(&wc.shared);
4236         if (!cache_tree_empty(&wc.shared))
4237                 fprintf(stderr, "warning line %d\n", __LINE__);
4238
4239         task_stop(ctx.info);
4240
4241         return err;
4242 }
4243
4244 /*
4245  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4246  * INODE_REF/INODE_EXTREF match.
4247  *
4248  * @root:       the root of the fs/file tree
4249  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4250  * @key:        the key of the DIR_ITEM/DIR_INDEX
4251  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4252  *              distinguish root_dir between normal dir/file
4253  * @name:       the name in the INODE_REF/INODE_EXTREF
4254  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4255  * @mode:       the st_mode of INODE_ITEM
4256  *
4257  * Return 0 if no error occurred.
4258  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4259  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4260  * dir/file.
4261  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4262  * not match for normal dir/file.
4263  */
4264 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4265                          struct btrfs_key *key, u64 index, char *name,
4266                          u32 namelen, u32 mode)
4267 {
4268         struct btrfs_path path;
4269         struct extent_buffer *node;
4270         struct btrfs_dir_item *di;
4271         struct btrfs_key location;
4272         char namebuf[BTRFS_NAME_LEN] = {0};
4273         u32 total;
4274         u32 cur = 0;
4275         u32 len;
4276         u32 name_len;
4277         u32 data_len;
4278         u8 filetype;
4279         int slot;
4280         int ret;
4281
4282         btrfs_init_path(&path);
4283         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4284         if (ret < 0) {
4285                 ret = DIR_ITEM_MISSING;
4286                 goto out;
4287         }
4288
4289         /* Process root dir and goto out*/
4290         if (index == 0) {
4291                 if (ret == 0) {
4292                         ret = ROOT_DIR_ERROR;
4293                         error(
4294                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4295                                 root->objectid,
4296                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4297                                         "REF" : "EXTREF",
4298                                 ref_key->objectid, ref_key->offset,
4299                                 key->type == BTRFS_DIR_ITEM_KEY ?
4300                                         "DIR_ITEM" : "DIR_INDEX");
4301                 } else {
4302                         ret = 0;
4303                 }
4304
4305                 goto out;
4306         }
4307
4308         /* Process normal file/dir */
4309         if (ret > 0) {
4310                 ret = DIR_ITEM_MISSING;
4311                 error(
4312                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4313                         root->objectid,
4314                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4315                         ref_key->objectid, ref_key->offset,
4316                         key->type == BTRFS_DIR_ITEM_KEY ?
4317                                 "DIR_ITEM" : "DIR_INDEX",
4318                         key->objectid, key->offset, namelen, name,
4319                         imode_to_type(mode));
4320                 goto out;
4321         }
4322
4323         /* Check whether inode_id/filetype/name match */
4324         node = path.nodes[0];
4325         slot = path.slots[0];
4326         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4327         total = btrfs_item_size_nr(node, slot);
4328         while (cur < total) {
4329                 ret = DIR_ITEM_MISMATCH;
4330                 name_len = btrfs_dir_name_len(node, di);
4331                 data_len = btrfs_dir_data_len(node, di);
4332
4333                 btrfs_dir_item_key_to_cpu(node, di, &location);
4334                 if (location.objectid != ref_key->objectid ||
4335                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4336                     location.offset != 0)
4337                         goto next;
4338
4339                 filetype = btrfs_dir_type(node, di);
4340                 if (imode_to_type(mode) != filetype)
4341                         goto next;
4342
4343                 if (cur + sizeof(*di) + name_len > total ||
4344                     name_len > BTRFS_NAME_LEN) {
4345                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4346                                 root->objectid,
4347                                 key->type == BTRFS_DIR_ITEM_KEY ?
4348                                 "DIR_ITEM" : "DIR_INDEX",
4349                                 key->objectid, key->offset, name_len);
4350
4351                         if (cur + sizeof(*di) > total)
4352                                 break;
4353                         len = min_t(u32, total - cur - sizeof(*di),
4354                                     BTRFS_NAME_LEN);
4355                 } else {
4356                         len = name_len;
4357                 }
4358
4359                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4360                 if (len != namelen || strncmp(namebuf, name, len))
4361                         goto next;
4362
4363                 ret = 0;
4364                 goto out;
4365 next:
4366                 len = sizeof(*di) + name_len + data_len;
4367                 di = (struct btrfs_dir_item *)((char *)di + len);
4368                 cur += len;
4369         }
4370         if (ret == DIR_ITEM_MISMATCH)
4371                 error(
4372                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4373                         root->objectid,
4374                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4375                         ref_key->objectid, ref_key->offset,
4376                         key->type == BTRFS_DIR_ITEM_KEY ?
4377                                 "DIR_ITEM" : "DIR_INDEX",
4378                         key->objectid, key->offset, namelen, name,
4379                         imode_to_type(mode));
4380 out:
4381         btrfs_release_path(&path);
4382         return ret;
4383 }
4384
4385 /*
4386  * Traverse the given INODE_REF and call find_dir_item() to find related
4387  * DIR_ITEM/DIR_INDEX.
4388  *
4389  * @root:       the root of the fs/file tree
4390  * @ref_key:    the key of the INODE_REF
4391  * @refs:       the count of INODE_REF
4392  * @mode:       the st_mode of INODE_ITEM
4393  *
4394  * Return 0 if no error occurred.
4395  */
4396 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4397                            struct extent_buffer *node, int slot, u64 *refs,
4398                            int mode)
4399 {
4400         struct btrfs_key key;
4401         struct btrfs_inode_ref *ref;
4402         char namebuf[BTRFS_NAME_LEN] = {0};
4403         u32 total;
4404         u32 cur = 0;
4405         u32 len;
4406         u32 name_len;
4407         u64 index;
4408         int ret, err = 0;
4409
4410         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4411         total = btrfs_item_size_nr(node, slot);
4412
4413 next:
4414         /* Update inode ref count */
4415         (*refs)++;
4416
4417         index = btrfs_inode_ref_index(node, ref);
4418         name_len = btrfs_inode_ref_name_len(node, ref);
4419         if (cur + sizeof(*ref) + name_len > total ||
4420             name_len > BTRFS_NAME_LEN) {
4421                 warning("root %llu INODE_REF[%llu %llu] name too long",
4422                         root->objectid, ref_key->objectid, ref_key->offset);
4423
4424                 if (total < cur + sizeof(*ref))
4425                         goto out;
4426                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4427         } else {
4428                 len = name_len;
4429         }
4430
4431         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4432
4433         /* Check root dir ref name */
4434         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4435                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4436                       root->objectid, ref_key->objectid, ref_key->offset,
4437                       namebuf);
4438                 err |= ROOT_DIR_ERROR;
4439         }
4440
4441         /* Find related DIR_INDEX */
4442         key.objectid = ref_key->offset;
4443         key.type = BTRFS_DIR_INDEX_KEY;
4444         key.offset = index;
4445         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4446         err |= ret;
4447
4448         /* Find related dir_item */
4449         key.objectid = ref_key->offset;
4450         key.type = BTRFS_DIR_ITEM_KEY;
4451         key.offset = btrfs_name_hash(namebuf, len);
4452         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4453         err |= ret;
4454
4455         len = sizeof(*ref) + name_len;
4456         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4457         cur += len;
4458         if (cur < total)
4459                 goto next;
4460
4461 out:
4462         return err;
4463 }
4464
4465 /*
4466  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4467  * DIR_ITEM/DIR_INDEX.
4468  *
4469  * @root:       the root of the fs/file tree
4470  * @ref_key:    the key of the INODE_EXTREF
4471  * @refs:       the count of INODE_EXTREF
4472  * @mode:       the st_mode of INODE_ITEM
4473  *
4474  * Return 0 if no error occurred.
4475  */
4476 static int check_inode_extref(struct btrfs_root *root,
4477                               struct btrfs_key *ref_key,
4478                               struct extent_buffer *node, int slot, u64 *refs,
4479                               int mode)
4480 {
4481         struct btrfs_key key;
4482         struct btrfs_inode_extref *extref;
4483         char namebuf[BTRFS_NAME_LEN] = {0};
4484         u32 total;
4485         u32 cur = 0;
4486         u32 len;
4487         u32 name_len;
4488         u64 index;
4489         u64 parent;
4490         int ret;
4491         int err = 0;
4492
4493         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4494         total = btrfs_item_size_nr(node, slot);
4495
4496 next:
4497         /* update inode ref count */
4498         (*refs)++;
4499         name_len = btrfs_inode_extref_name_len(node, extref);
4500         index = btrfs_inode_extref_index(node, extref);
4501         parent = btrfs_inode_extref_parent(node, extref);
4502         if (name_len <= BTRFS_NAME_LEN) {
4503                 len = name_len;
4504         } else {
4505                 len = BTRFS_NAME_LEN;
4506                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4507                         root->objectid, ref_key->objectid, ref_key->offset);
4508         }
4509         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4510
4511         /* Check root dir ref name */
4512         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4513                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4514                       root->objectid, ref_key->objectid, ref_key->offset,
4515                       namebuf);
4516                 err |= ROOT_DIR_ERROR;
4517         }
4518
4519         /* find related dir_index */
4520         key.objectid = parent;
4521         key.type = BTRFS_DIR_INDEX_KEY;
4522         key.offset = index;
4523         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4524         err |= ret;
4525
4526         /* find related dir_item */
4527         key.objectid = parent;
4528         key.type = BTRFS_DIR_ITEM_KEY;
4529         key.offset = btrfs_name_hash(namebuf, len);
4530         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4531         err |= ret;
4532
4533         len = sizeof(*extref) + name_len;
4534         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4535         cur += len;
4536
4537         if (cur < total)
4538                 goto next;
4539
4540         return err;
4541 }
4542
4543 /*
4544  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4545  * DIR_ITEM/DIR_INDEX match.
4546  *
4547  * @root:       the root of the fs/file tree
4548  * @key:        the key of the INODE_REF/INODE_EXTREF
4549  * @name:       the name in the INODE_REF/INODE_EXTREF
4550  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4551  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4552  * to (u64)-1
4553  * @ext_ref:    the EXTENDED_IREF feature
4554  *
4555  * Return 0 if no error occurred.
4556  * Return >0 for error bitmap
4557  */
4558 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4559                           char *name, int namelen, u64 index,
4560                           unsigned int ext_ref)
4561 {
4562         struct btrfs_path path;
4563         struct btrfs_inode_ref *ref;
4564         struct btrfs_inode_extref *extref;
4565         struct extent_buffer *node;
4566         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4567         u32 total;
4568         u32 cur = 0;
4569         u32 len;
4570         u32 ref_namelen;
4571         u64 ref_index;
4572         u64 parent;
4573         u64 dir_id;
4574         int slot;
4575         int ret;
4576
4577         btrfs_init_path(&path);
4578         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4579         if (ret) {
4580                 ret = INODE_REF_MISSING;
4581                 goto extref;
4582         }
4583
4584         node = path.nodes[0];
4585         slot = path.slots[0];
4586
4587         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4588         total = btrfs_item_size_nr(node, slot);
4589
4590         /* Iterate all entry of INODE_REF */
4591         while (cur < total) {
4592                 ret = INODE_REF_MISSING;
4593
4594                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4595                 ref_index = btrfs_inode_ref_index(node, ref);
4596                 if (index != (u64)-1 && index != ref_index)
4597                         goto next_ref;
4598
4599                 if (cur + sizeof(*ref) + ref_namelen > total ||
4600                     ref_namelen > BTRFS_NAME_LEN) {
4601                         warning("root %llu INODE %s[%llu %llu] name too long",
4602                                 root->objectid,
4603                                 key->type == BTRFS_INODE_REF_KEY ?
4604                                         "REF" : "EXTREF",
4605                                 key->objectid, key->offset);
4606
4607                         if (cur + sizeof(*ref) > total)
4608                                 break;
4609                         len = min_t(u32, total - cur - sizeof(*ref),
4610                                     BTRFS_NAME_LEN);
4611                 } else {
4612                         len = ref_namelen;
4613                 }
4614
4615                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4616                                    len);
4617
4618                 if (len != namelen || strncmp(ref_namebuf, name, len))
4619                         goto next_ref;
4620
4621                 ret = 0;
4622                 goto out;
4623 next_ref:
4624                 len = sizeof(*ref) + ref_namelen;
4625                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4626                 cur += len;
4627         }
4628
4629 extref:
4630         /* Skip if not support EXTENDED_IREF feature */
4631         if (!ext_ref)
4632                 goto out;
4633
4634         btrfs_release_path(&path);
4635         btrfs_init_path(&path);
4636
4637         dir_id = key->offset;
4638         key->type = BTRFS_INODE_EXTREF_KEY;
4639         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4640
4641         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4642         if (ret) {
4643                 ret = INODE_REF_MISSING;
4644                 goto out;
4645         }
4646
4647         node = path.nodes[0];
4648         slot = path.slots[0];
4649
4650         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4651         cur = 0;
4652         total = btrfs_item_size_nr(node, slot);
4653
4654         /* Iterate all entry of INODE_EXTREF */
4655         while (cur < total) {
4656                 ret = INODE_REF_MISSING;
4657
4658                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4659                 ref_index = btrfs_inode_extref_index(node, extref);
4660                 parent = btrfs_inode_extref_parent(node, extref);
4661                 if (index != (u64)-1 && index != ref_index)
4662                         goto next_extref;
4663
4664                 if (parent != dir_id)
4665                         goto next_extref;
4666
4667                 if (ref_namelen <= BTRFS_NAME_LEN) {
4668                         len = ref_namelen;
4669                 } else {
4670                         len = BTRFS_NAME_LEN;
4671                         warning("root %llu INODE %s[%llu %llu] name too long",
4672                                 root->objectid,
4673                                 key->type == BTRFS_INODE_REF_KEY ?
4674                                         "REF" : "EXTREF",
4675                                 key->objectid, key->offset);
4676                 }
4677                 read_extent_buffer(node, ref_namebuf,
4678                                    (unsigned long)(extref + 1), len);
4679
4680                 if (len != namelen || strncmp(ref_namebuf, name, len))
4681                         goto next_extref;
4682
4683                 ret = 0;
4684                 goto out;
4685
4686 next_extref:
4687                 len = sizeof(*extref) + ref_namelen;
4688                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4689                 cur += len;
4690
4691         }
4692 out:
4693         btrfs_release_path(&path);
4694         return ret;
4695 }
4696
4697 /*
4698  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4699  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4700  *
4701  * @root:       the root of the fs/file tree
4702  * @key:        the key of the INODE_REF/INODE_EXTREF
4703  * @size:       the st_size of the INODE_ITEM
4704  * @ext_ref:    the EXTENDED_IREF feature
4705  *
4706  * Return 0 if no error occurred.
4707  */
4708 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4709                           struct extent_buffer *node, int slot, u64 *size,
4710                           unsigned int ext_ref)
4711 {
4712         struct btrfs_dir_item *di;
4713         struct btrfs_inode_item *ii;
4714         struct btrfs_path path;
4715         struct btrfs_key location;
4716         char namebuf[BTRFS_NAME_LEN] = {0};
4717         u32 total;
4718         u32 cur = 0;
4719         u32 len;
4720         u32 name_len;
4721         u32 data_len;
4722         u8 filetype;
4723         u32 mode;
4724         u64 index;
4725         int ret;
4726         int err = 0;
4727
4728         /*
4729          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4730          * ignore index check.
4731          */
4732         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4733
4734         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4735         total = btrfs_item_size_nr(node, slot);
4736
4737         while (cur < total) {
4738                 data_len = btrfs_dir_data_len(node, di);
4739                 if (data_len)
4740                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4741                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4742                               "DIR_ITEM" : "DIR_INDEX",
4743                               key->objectid, key->offset, data_len);
4744
4745                 name_len = btrfs_dir_name_len(node, di);
4746                 if (cur + sizeof(*di) + name_len > total ||
4747                     name_len > BTRFS_NAME_LEN) {
4748                         warning("root %llu %s[%llu %llu] name too long",
4749                                 root->objectid,
4750                                 key->type == BTRFS_DIR_ITEM_KEY ?
4751                                 "DIR_ITEM" : "DIR_INDEX",
4752                                 key->objectid, key->offset);
4753
4754                         if (cur + sizeof(*di) > total)
4755                                 break;
4756                         len = min_t(u32, total - cur - sizeof(*di),
4757                                     BTRFS_NAME_LEN);
4758                 } else {
4759                         len = name_len;
4760                 }
4761                 (*size) += name_len;
4762
4763                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4764                 filetype = btrfs_dir_type(node, di);
4765
4766                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4767                     key->offset != btrfs_name_hash(namebuf, len)) {
4768                         err |= -EIO;
4769                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4770                                 root->objectid, key->objectid, key->offset,
4771                                 namebuf, len, filetype, key->offset,
4772                                 btrfs_name_hash(namebuf, len));
4773                 }
4774
4775                 btrfs_init_path(&path);
4776                 btrfs_dir_item_key_to_cpu(node, di, &location);
4777
4778                 /* Ignore related ROOT_ITEM check */
4779                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4780                         goto next;
4781
4782                 /* Check relative INODE_ITEM(existence/filetype) */
4783                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4784                 if (ret) {
4785                         err |= INODE_ITEM_MISSING;
4786                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4787                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4788                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4789                               key->offset, location.objectid, name_len,
4790                               namebuf, filetype);
4791                         goto next;
4792                 }
4793
4794                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4795                                     struct btrfs_inode_item);
4796                 mode = btrfs_inode_mode(path.nodes[0], ii);
4797
4798                 if (imode_to_type(mode) != filetype) {
4799                         err |= INODE_ITEM_MISMATCH;
4800                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4801                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4802                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4803                               key->offset, name_len, namebuf, filetype);
4804                 }
4805
4806                 /* Check relative INODE_REF/INODE_EXTREF */
4807                 location.type = BTRFS_INODE_REF_KEY;
4808                 location.offset = key->objectid;
4809                 ret = find_inode_ref(root, &location, namebuf, len,
4810                                        index, ext_ref);
4811                 err |= ret;
4812                 if (ret & INODE_REF_MISSING)
4813                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4814                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4815                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4816                               key->offset, name_len, namebuf, filetype);
4817
4818 next:
4819                 btrfs_release_path(&path);
4820                 len = sizeof(*di) + name_len + data_len;
4821                 di = (struct btrfs_dir_item *)((char *)di + len);
4822                 cur += len;
4823
4824                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4825                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4826                               root->objectid, key->objectid, key->offset);
4827                         break;
4828                 }
4829         }
4830
4831         return err;
4832 }
4833
4834 /*
4835  * Check file extent datasum/hole, update the size of the file extents,
4836  * check and update the last offset of the file extent.
4837  *
4838  * @root:       the root of fs/file tree.
4839  * @fkey:       the key of the file extent.
4840  * @nodatasum:  INODE_NODATASUM feature.
4841  * @size:       the sum of all EXTENT_DATA items size for this inode.
4842  * @end:        the offset of the last extent.
4843  *
4844  * Return 0 if no error occurred.
4845  */
4846 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4847                              struct extent_buffer *node, int slot,
4848                              unsigned int nodatasum, u64 *size, u64 *end)
4849 {
4850         struct btrfs_file_extent_item *fi;
4851         u64 disk_bytenr;
4852         u64 disk_num_bytes;
4853         u64 extent_num_bytes;
4854         u64 extent_offset;
4855         u64 csum_found;         /* In byte size, sectorsize aligned */
4856         u64 search_start;       /* Logical range start we search for csum */
4857         u64 search_len;         /* Logical range len we search for csum */
4858         unsigned int extent_type;
4859         unsigned int is_hole;
4860         int compressed = 0;
4861         int ret;
4862         int err = 0;
4863
4864         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4865
4866         /* Check inline extent */
4867         extent_type = btrfs_file_extent_type(node, fi);
4868         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4869                 struct btrfs_item *e = btrfs_item_nr(slot);
4870                 u32 item_inline_len;
4871
4872                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4873                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4874                 compressed = btrfs_file_extent_compression(node, fi);
4875                 if (extent_num_bytes == 0) {
4876                         error(
4877                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4878                                 root->objectid, fkey->objectid, fkey->offset);
4879                         err |= FILE_EXTENT_ERROR;
4880                 }
4881                 if (!compressed && extent_num_bytes != item_inline_len) {
4882                         error(
4883                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4884                                 root->objectid, fkey->objectid, fkey->offset,
4885                                 extent_num_bytes, item_inline_len);
4886                         err |= FILE_EXTENT_ERROR;
4887                 }
4888                 *end += extent_num_bytes;
4889                 *size += extent_num_bytes;
4890                 return err;
4891         }
4892
4893         /* Check extent type */
4894         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4895                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4896                 err |= FILE_EXTENT_ERROR;
4897                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4898                       root->objectid, fkey->objectid, fkey->offset);
4899                 return err;
4900         }
4901
4902         /* Check REG_EXTENT/PREALLOC_EXTENT */
4903         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4904         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4905         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4906         extent_offset = btrfs_file_extent_offset(node, fi);
4907         compressed = btrfs_file_extent_compression(node, fi);
4908         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4909
4910         /*
4911          * Check EXTENT_DATA csum
4912          *
4913          * For plain (uncompressed) extent, we should only check the range
4914          * we're referring to, as it's possible that part of prealloc extent
4915          * has been written, and has csum:
4916          *
4917          * |<--- Original large preallocated extent A ---->|
4918          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4919          *      No csum                         Has csum
4920          *
4921          * For compressed extent, we should check the whole range.
4922          */
4923         if (!compressed) {
4924                 search_start = disk_bytenr + extent_offset;
4925                 search_len = extent_num_bytes;
4926         } else {
4927                 search_start = disk_bytenr;
4928                 search_len = disk_num_bytes;
4929         }
4930         ret = count_csum_range(root, search_start, search_len, &csum_found);
4931         if (csum_found > 0 && nodatasum) {
4932                 err |= ODD_CSUM_ITEM;
4933                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4934                       root->objectid, fkey->objectid, fkey->offset);
4935         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4936                    !is_hole && (ret < 0 || csum_found < search_len)) {
4937                 err |= CSUM_ITEM_MISSING;
4938                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4939                       root->objectid, fkey->objectid, fkey->offset,
4940                       csum_found, search_len);
4941         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4942                 err |= ODD_CSUM_ITEM;
4943                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4944                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4945         }
4946
4947         /* Check EXTENT_DATA hole */
4948         if (!no_holes && *end != fkey->offset) {
4949                 err |= FILE_EXTENT_ERROR;
4950                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4951                       root->objectid, fkey->objectid, fkey->offset);
4952         }
4953
4954         *end += extent_num_bytes;
4955         if (!is_hole)
4956                 *size += extent_num_bytes;
4957
4958         return err;
4959 }
4960
4961 /*
4962  * Check INODE_ITEM and related ITEMs (the same inode number)
4963  * 1. check link count
4964  * 2. check inode ref/extref
4965  * 3. check dir item/index
4966  *
4967  * @ext_ref:    the EXTENDED_IREF feature
4968  *
4969  * Return 0 if no error occurred.
4970  * Return >0 for error or hit the traversal is done(by error bitmap)
4971  */
4972 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4973                             unsigned int ext_ref)
4974 {
4975         struct extent_buffer *node;
4976         struct btrfs_inode_item *ii;
4977         struct btrfs_key key;
4978         u64 inode_id;
4979         u32 mode;
4980         u64 nlink;
4981         u64 nbytes;
4982         u64 isize;
4983         u64 size = 0;
4984         u64 refs = 0;
4985         u64 extent_end = 0;
4986         u64 extent_size = 0;
4987         unsigned int dir;
4988         unsigned int nodatasum;
4989         int slot;
4990         int ret;
4991         int err = 0;
4992
4993         node = path->nodes[0];
4994         slot = path->slots[0];
4995
4996         btrfs_item_key_to_cpu(node, &key, slot);
4997         inode_id = key.objectid;
4998
4999         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5000                 ret = btrfs_next_item(root, path);
5001                 if (ret > 0)
5002                         err |= LAST_ITEM;
5003                 return err;
5004         }
5005
5006         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5007         isize = btrfs_inode_size(node, ii);
5008         nbytes = btrfs_inode_nbytes(node, ii);
5009         mode = btrfs_inode_mode(node, ii);
5010         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5011         nlink = btrfs_inode_nlink(node, ii);
5012         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5013
5014         while (1) {
5015                 ret = btrfs_next_item(root, path);
5016                 if (ret < 0) {
5017                         /* out will fill 'err' rusing current statistics */
5018                         goto out;
5019                 } else if (ret > 0) {
5020                         err |= LAST_ITEM;
5021                         goto out;
5022                 }
5023
5024                 node = path->nodes[0];
5025                 slot = path->slots[0];
5026                 btrfs_item_key_to_cpu(node, &key, slot);
5027                 if (key.objectid != inode_id)
5028                         goto out;
5029
5030                 switch (key.type) {
5031                 case BTRFS_INODE_REF_KEY:
5032                         ret = check_inode_ref(root, &key, node, slot, &refs,
5033                                               mode);
5034                         err |= ret;
5035                         break;
5036                 case BTRFS_INODE_EXTREF_KEY:
5037                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5038                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5039                                         root->objectid, key.objectid,
5040                                         key.offset);
5041                         ret = check_inode_extref(root, &key, node, slot, &refs,
5042                                                  mode);
5043                         err |= ret;
5044                         break;
5045                 case BTRFS_DIR_ITEM_KEY:
5046                 case BTRFS_DIR_INDEX_KEY:
5047                         if (!dir) {
5048                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5049                                         root->objectid, inode_id,
5050                                         imode_to_type(mode), key.objectid,
5051                                         key.offset);
5052                         }
5053                         ret = check_dir_item(root, &key, node, slot, &size,
5054                                              ext_ref);
5055                         err |= ret;
5056                         break;
5057                 case BTRFS_EXTENT_DATA_KEY:
5058                         if (dir) {
5059                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5060                                         root->objectid, inode_id, key.objectid,
5061                                         key.offset);
5062                         }
5063                         ret = check_file_extent(root, &key, node, slot,
5064                                                 nodatasum, &extent_size,
5065                                                 &extent_end);
5066                         err |= ret;
5067                         break;
5068                 case BTRFS_XATTR_ITEM_KEY:
5069                         break;
5070                 default:
5071                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5072                               key.objectid, key.type, key.offset);
5073                 }
5074         }
5075
5076 out:
5077         /* verify INODE_ITEM nlink/isize/nbytes */
5078         if (dir) {
5079                 if (nlink != 1) {
5080                         err |= LINK_COUNT_ERROR;
5081                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5082                               root->objectid, inode_id, nlink);
5083                 }
5084
5085                 /*
5086                  * Just a warning, as dir inode nbytes is just an
5087                  * instructive value.
5088                  */
5089                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5090                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5091                                 root->objectid, inode_id,
5092                                 root->fs_info->nodesize);
5093                 }
5094
5095                 if (isize != size) {
5096                         err |= ISIZE_ERROR;
5097                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5098                               root->objectid, inode_id, isize, size);
5099                 }
5100         } else {
5101                 if (nlink != refs) {
5102                         err |= LINK_COUNT_ERROR;
5103                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5104                               root->objectid, inode_id, nlink, refs);
5105                 } else if (!nlink) {
5106                         err |= ORPHAN_ITEM;
5107                 }
5108
5109                 if (!nbytes && !no_holes && extent_end < isize) {
5110                         err |= NBYTES_ERROR;
5111                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5112                               root->objectid, inode_id, isize);
5113                 }
5114
5115                 if (nbytes != extent_size) {
5116                         err |= NBYTES_ERROR;
5117                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5118                               root->objectid, inode_id, nbytes, extent_size);
5119                 }
5120         }
5121
5122         return err;
5123 }
5124
5125 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5126 {
5127         struct btrfs_path path;
5128         struct btrfs_key key;
5129         int err = 0;
5130         int ret;
5131
5132         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5133         key.type = BTRFS_INODE_ITEM_KEY;
5134         key.offset = 0;
5135
5136         /* For root being dropped, we don't need to check first inode */
5137         if (btrfs_root_refs(&root->root_item) == 0 &&
5138             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5139             key.objectid)
5140                 return 0;
5141
5142         btrfs_init_path(&path);
5143
5144         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5145         if (ret < 0)
5146                 goto out;
5147         if (ret > 0) {
5148                 ret = 0;
5149                 err |= INODE_ITEM_MISSING;
5150                 error("first inode item of root %llu is missing",
5151                       root->objectid);
5152         }
5153
5154         err |= check_inode_item(root, &path, ext_ref);
5155         err &= ~LAST_ITEM;
5156         if (err && !ret)
5157                 ret = -EIO;
5158 out:
5159         btrfs_release_path(&path);
5160         return ret;
5161 }
5162
5163 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5164                                                 u64 parent, u64 root)
5165 {
5166         struct rb_node *node;
5167         struct tree_backref *back = NULL;
5168         struct tree_backref match = {
5169                 .node = {
5170                         .is_data = 0,
5171                 },
5172         };
5173
5174         if (parent) {
5175                 match.parent = parent;
5176                 match.node.full_backref = 1;
5177         } else {
5178                 match.root = root;
5179         }
5180
5181         node = rb_search(&rec->backref_tree, &match.node.node,
5182                          (rb_compare_keys)compare_extent_backref, NULL);
5183         if (node)
5184                 back = to_tree_backref(rb_node_to_extent_backref(node));
5185
5186         return back;
5187 }
5188
5189 static struct data_backref *find_data_backref(struct extent_record *rec,
5190                                                 u64 parent, u64 root,
5191                                                 u64 owner, u64 offset,
5192                                                 int found_ref,
5193                                                 u64 disk_bytenr, u64 bytes)
5194 {
5195         struct rb_node *node;
5196         struct data_backref *back = NULL;
5197         struct data_backref match = {
5198                 .node = {
5199                         .is_data = 1,
5200                 },
5201                 .owner = owner,
5202                 .offset = offset,
5203                 .bytes = bytes,
5204                 .found_ref = found_ref,
5205                 .disk_bytenr = disk_bytenr,
5206         };
5207
5208         if (parent) {
5209                 match.parent = parent;
5210                 match.node.full_backref = 1;
5211         } else {
5212                 match.root = root;
5213         }
5214
5215         node = rb_search(&rec->backref_tree, &match.node.node,
5216                          (rb_compare_keys)compare_extent_backref, NULL);
5217         if (node)
5218                 back = to_data_backref(rb_node_to_extent_backref(node));
5219
5220         return back;
5221 }
5222 /*
5223  * Iterate all item on the tree and call check_inode_item() to check.
5224  *
5225  * @root:       the root of the tree to be checked.
5226  * @ext_ref:    the EXTENDED_IREF feature
5227  *
5228  * Return 0 if no error found.
5229  * Return <0 for error.
5230  */
5231 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5232 {
5233         struct btrfs_path path;
5234         struct node_refs nrefs;
5235         struct btrfs_root_item *root_item = &root->root_item;
5236         int ret;
5237         int level;
5238         int err = 0;
5239
5240         /*
5241          * We need to manually check the first inode item(256)
5242          * As the following traversal function will only start from
5243          * the first inode item in the leaf, if inode item(256) is missing
5244          * we will just skip it forever.
5245          */
5246         ret = check_fs_first_inode(root, ext_ref);
5247         if (ret < 0)
5248                 return ret;
5249
5250         memset(&nrefs, 0, sizeof(nrefs));
5251         level = btrfs_header_level(root->node);
5252         btrfs_init_path(&path);
5253
5254         if (btrfs_root_refs(root_item) > 0 ||
5255             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5256                 path.nodes[level] = root->node;
5257                 path.slots[level] = 0;
5258                 extent_buffer_get(root->node);
5259         } else {
5260                 struct btrfs_key key;
5261
5262                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5263                 level = root_item->drop_level;
5264                 path.lowest_level = level;
5265                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5266                 if (ret < 0)
5267                         goto out;
5268                 ret = 0;
5269         }
5270
5271         while (1) {
5272                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5273                 err |= !!ret;
5274
5275                 /* if ret is negative, walk shall stop */
5276                 if (ret < 0) {
5277                         ret = err;
5278                         break;
5279                 }
5280
5281                 ret = walk_up_tree_v2(root, &path, &level);
5282                 if (ret != 0) {
5283                         /* Normal exit, reset ret to err */
5284                         ret = err;
5285                         break;
5286                 }
5287         }
5288
5289 out:
5290         btrfs_release_path(&path);
5291         return ret;
5292 }
5293
5294 /*
5295  * Find the relative ref for root_ref and root_backref.
5296  *
5297  * @root:       the root of the root tree.
5298  * @ref_key:    the key of the root ref.
5299  *
5300  * Return 0 if no error occurred.
5301  */
5302 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5303                           struct extent_buffer *node, int slot)
5304 {
5305         struct btrfs_path path;
5306         struct btrfs_key key;
5307         struct btrfs_root_ref *ref;
5308         struct btrfs_root_ref *backref;
5309         char ref_name[BTRFS_NAME_LEN] = {0};
5310         char backref_name[BTRFS_NAME_LEN] = {0};
5311         u64 ref_dirid;
5312         u64 ref_seq;
5313         u32 ref_namelen;
5314         u64 backref_dirid;
5315         u64 backref_seq;
5316         u32 backref_namelen;
5317         u32 len;
5318         int ret;
5319         int err = 0;
5320
5321         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5322         ref_dirid = btrfs_root_ref_dirid(node, ref);
5323         ref_seq = btrfs_root_ref_sequence(node, ref);
5324         ref_namelen = btrfs_root_ref_name_len(node, ref);
5325
5326         if (ref_namelen <= BTRFS_NAME_LEN) {
5327                 len = ref_namelen;
5328         } else {
5329                 len = BTRFS_NAME_LEN;
5330                 warning("%s[%llu %llu] ref_name too long",
5331                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5332                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5333                         ref_key->offset);
5334         }
5335         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5336
5337         /* Find relative root_ref */
5338         key.objectid = ref_key->offset;
5339         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5340         key.offset = ref_key->objectid;
5341
5342         btrfs_init_path(&path);
5343         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5344         if (ret) {
5345                 err |= ROOT_REF_MISSING;
5346                 error("%s[%llu %llu] couldn't find relative ref",
5347                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5348                       "ROOT_REF" : "ROOT_BACKREF",
5349                       ref_key->objectid, ref_key->offset);
5350                 goto out;
5351         }
5352
5353         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5354                                  struct btrfs_root_ref);
5355         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5356         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5357         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5358
5359         if (backref_namelen <= BTRFS_NAME_LEN) {
5360                 len = backref_namelen;
5361         } else {
5362                 len = BTRFS_NAME_LEN;
5363                 warning("%s[%llu %llu] ref_name too long",
5364                         key.type == BTRFS_ROOT_REF_KEY ?
5365                         "ROOT_REF" : "ROOT_BACKREF",
5366                         key.objectid, key.offset);
5367         }
5368         read_extent_buffer(path.nodes[0], backref_name,
5369                            (unsigned long)(backref + 1), len);
5370
5371         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5372             ref_namelen != backref_namelen ||
5373             strncmp(ref_name, backref_name, len)) {
5374                 err |= ROOT_REF_MISMATCH;
5375                 error("%s[%llu %llu] mismatch relative ref",
5376                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5377                       "ROOT_REF" : "ROOT_BACKREF",
5378                       ref_key->objectid, ref_key->offset);
5379         }
5380 out:
5381         btrfs_release_path(&path);
5382         return err;
5383 }
5384
5385 /*
5386  * Check all fs/file tree in low_memory mode.
5387  *
5388  * 1. for fs tree root item, call check_fs_root_v2()
5389  * 2. for fs tree root ref/backref, call check_root_ref()
5390  *
5391  * Return 0 if no error occurred.
5392  */
5393 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5394 {
5395         struct btrfs_root *tree_root = fs_info->tree_root;
5396         struct btrfs_root *cur_root = NULL;
5397         struct btrfs_path path;
5398         struct btrfs_key key;
5399         struct extent_buffer *node;
5400         unsigned int ext_ref;
5401         int slot;
5402         int ret;
5403         int err = 0;
5404
5405         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5406
5407         btrfs_init_path(&path);
5408         key.objectid = BTRFS_FS_TREE_OBJECTID;
5409         key.offset = 0;
5410         key.type = BTRFS_ROOT_ITEM_KEY;
5411
5412         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5413         if (ret < 0) {
5414                 err = ret;
5415                 goto out;
5416         } else if (ret > 0) {
5417                 err = -ENOENT;
5418                 goto out;
5419         }
5420
5421         while (1) {
5422                 node = path.nodes[0];
5423                 slot = path.slots[0];
5424                 btrfs_item_key_to_cpu(node, &key, slot);
5425                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5426                         goto out;
5427                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5428                     fs_root_objectid(key.objectid)) {
5429                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5430                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5431                                                                        &key);
5432                         } else {
5433                                 key.offset = (u64)-1;
5434                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5435                         }
5436
5437                         if (IS_ERR(cur_root)) {
5438                                 error("Fail to read fs/subvol tree: %lld",
5439                                       key.objectid);
5440                                 err = -EIO;
5441                                 goto next;
5442                         }
5443
5444                         ret = check_fs_root_v2(cur_root, ext_ref);
5445                         err |= ret;
5446
5447                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5448                                 btrfs_free_fs_root(cur_root);
5449                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5450                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5451                         ret = check_root_ref(tree_root, &key, node, slot);
5452                         err |= ret;
5453                 }
5454 next:
5455                 ret = btrfs_next_item(tree_root, &path);
5456                 if (ret > 0)
5457                         goto out;
5458                 if (ret < 0) {
5459                         err = ret;
5460                         goto out;
5461                 }
5462         }
5463
5464 out:
5465         btrfs_release_path(&path);
5466         return err;
5467 }
5468
5469 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5470                           struct cache_tree *root_cache)
5471 {
5472         int ret;
5473
5474         if (!ctx.progress_enabled)
5475                 fprintf(stderr, "checking fs roots\n");
5476         if (check_mode == CHECK_MODE_LOWMEM)
5477                 ret = check_fs_roots_v2(fs_info);
5478         else
5479                 ret = check_fs_roots(fs_info, root_cache);
5480
5481         return ret;
5482 }
5483
5484 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5485 {
5486         struct list_head *cur = rec->backrefs.next;
5487         struct extent_backref *back;
5488         struct tree_backref *tback;
5489         struct data_backref *dback;
5490         u64 found = 0;
5491         int err = 0;
5492
5493         while(cur != &rec->backrefs) {
5494                 back = to_extent_backref(cur);
5495                 cur = cur->next;
5496                 if (!back->found_extent_tree) {
5497                         err = 1;
5498                         if (!print_errs)
5499                                 goto out;
5500                         if (back->is_data) {
5501                                 dback = to_data_backref(back);
5502                                 fprintf(stderr, "Data backref %llu %s %llu"
5503                                         " owner %llu offset %llu num_refs %lu"
5504                                         " not found in extent tree\n",
5505                                         (unsigned long long)rec->start,
5506                                         back->full_backref ?
5507                                         "parent" : "root",
5508                                         back->full_backref ?
5509                                         (unsigned long long)dback->parent:
5510                                         (unsigned long long)dback->root,
5511                                         (unsigned long long)dback->owner,
5512                                         (unsigned long long)dback->offset,
5513                                         (unsigned long)dback->num_refs);
5514                         } else {
5515                                 tback = to_tree_backref(back);
5516                                 fprintf(stderr, "Tree backref %llu parent %llu"
5517                                         " root %llu not found in extent tree\n",
5518                                         (unsigned long long)rec->start,
5519                                         (unsigned long long)tback->parent,
5520                                         (unsigned long long)tback->root);
5521                         }
5522                 }
5523                 if (!back->is_data && !back->found_ref) {
5524                         err = 1;
5525                         if (!print_errs)
5526                                 goto out;
5527                         tback = to_tree_backref(back);
5528                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5529                                 (unsigned long long)rec->start,
5530                                 back->full_backref ? "parent" : "root",
5531                                 back->full_backref ?
5532                                 (unsigned long long)tback->parent :
5533                                 (unsigned long long)tback->root, back);
5534                 }
5535                 if (back->is_data) {
5536                         dback = to_data_backref(back);
5537                         if (dback->found_ref != dback->num_refs) {
5538                                 err = 1;
5539                                 if (!print_errs)
5540                                         goto out;
5541                                 fprintf(stderr, "Incorrect local backref count"
5542                                         " on %llu %s %llu owner %llu"
5543                                         " offset %llu found %u wanted %u back %p\n",
5544                                         (unsigned long long)rec->start,
5545                                         back->full_backref ?
5546                                         "parent" : "root",
5547                                         back->full_backref ?
5548                                         (unsigned long long)dback->parent:
5549                                         (unsigned long long)dback->root,
5550                                         (unsigned long long)dback->owner,
5551                                         (unsigned long long)dback->offset,
5552                                         dback->found_ref, dback->num_refs, back);
5553                         }
5554                         if (dback->disk_bytenr != rec->start) {
5555                                 err = 1;
5556                                 if (!print_errs)
5557                                         goto out;
5558                                 fprintf(stderr, "Backref disk bytenr does not"
5559                                         " match extent record, bytenr=%llu, "
5560                                         "ref bytenr=%llu\n",
5561                                         (unsigned long long)rec->start,
5562                                         (unsigned long long)dback->disk_bytenr);
5563                         }
5564
5565                         if (dback->bytes != rec->nr) {
5566                                 err = 1;
5567                                 if (!print_errs)
5568                                         goto out;
5569                                 fprintf(stderr, "Backref bytes do not match "
5570                                         "extent backref, bytenr=%llu, ref "
5571                                         "bytes=%llu, backref bytes=%llu\n",
5572                                         (unsigned long long)rec->start,
5573                                         (unsigned long long)rec->nr,
5574                                         (unsigned long long)dback->bytes);
5575                         }
5576                 }
5577                 if (!back->is_data) {
5578                         found += 1;
5579                 } else {
5580                         dback = to_data_backref(back);
5581                         found += dback->found_ref;
5582                 }
5583         }
5584         if (found != rec->refs) {
5585                 err = 1;
5586                 if (!print_errs)
5587                         goto out;
5588                 fprintf(stderr, "Incorrect global backref count "
5589                         "on %llu found %llu wanted %llu\n",
5590                         (unsigned long long)rec->start,
5591                         (unsigned long long)found,
5592                         (unsigned long long)rec->refs);
5593         }
5594 out:
5595         return err;
5596 }
5597
5598 static int free_all_extent_backrefs(struct extent_record *rec)
5599 {
5600         struct extent_backref *back;
5601         struct list_head *cur;
5602         while (!list_empty(&rec->backrefs)) {
5603                 cur = rec->backrefs.next;
5604                 back = to_extent_backref(cur);
5605                 list_del(cur);
5606                 free(back);
5607         }
5608         return 0;
5609 }
5610
5611 static void free_extent_record_cache(struct cache_tree *extent_cache)
5612 {
5613         struct cache_extent *cache;
5614         struct extent_record *rec;
5615
5616         while (1) {
5617                 cache = first_cache_extent(extent_cache);
5618                 if (!cache)
5619                         break;
5620                 rec = container_of(cache, struct extent_record, cache);
5621                 remove_cache_extent(extent_cache, cache);
5622                 free_all_extent_backrefs(rec);
5623                 free(rec);
5624         }
5625 }
5626
5627 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5628                                  struct extent_record *rec)
5629 {
5630         if (rec->content_checked && rec->owner_ref_checked &&
5631             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5632             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5633             !rec->bad_full_backref && !rec->crossing_stripes &&
5634             !rec->wrong_chunk_type) {
5635                 remove_cache_extent(extent_cache, &rec->cache);
5636                 free_all_extent_backrefs(rec);
5637                 list_del_init(&rec->list);
5638                 free(rec);
5639         }
5640         return 0;
5641 }
5642
5643 static int check_owner_ref(struct btrfs_root *root,
5644                             struct extent_record *rec,
5645                             struct extent_buffer *buf)
5646 {
5647         struct extent_backref *node;
5648         struct tree_backref *back;
5649         struct btrfs_root *ref_root;
5650         struct btrfs_key key;
5651         struct btrfs_path path;
5652         struct extent_buffer *parent;
5653         int level;
5654         int found = 0;
5655         int ret;
5656
5657         list_for_each_entry(node, &rec->backrefs, list) {
5658                 if (node->is_data)
5659                         continue;
5660                 if (!node->found_ref)
5661                         continue;
5662                 if (node->full_backref)
5663                         continue;
5664                 back = to_tree_backref(node);
5665                 if (btrfs_header_owner(buf) == back->root)
5666                         return 0;
5667         }
5668         BUG_ON(rec->is_root);
5669
5670         /* try to find the block by search corresponding fs tree */
5671         key.objectid = btrfs_header_owner(buf);
5672         key.type = BTRFS_ROOT_ITEM_KEY;
5673         key.offset = (u64)-1;
5674
5675         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5676         if (IS_ERR(ref_root))
5677                 return 1;
5678
5679         level = btrfs_header_level(buf);
5680         if (level == 0)
5681                 btrfs_item_key_to_cpu(buf, &key, 0);
5682         else
5683                 btrfs_node_key_to_cpu(buf, &key, 0);
5684
5685         btrfs_init_path(&path);
5686         path.lowest_level = level + 1;
5687         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5688         if (ret < 0)
5689                 return 0;
5690
5691         parent = path.nodes[level + 1];
5692         if (parent && buf->start == btrfs_node_blockptr(parent,
5693                                                         path.slots[level + 1]))
5694                 found = 1;
5695
5696         btrfs_release_path(&path);
5697         return found ? 0 : 1;
5698 }
5699
5700 static int is_extent_tree_record(struct extent_record *rec)
5701 {
5702         struct list_head *cur = rec->backrefs.next;
5703         struct extent_backref *node;
5704         struct tree_backref *back;
5705         int is_extent = 0;
5706
5707         while(cur != &rec->backrefs) {
5708                 node = to_extent_backref(cur);
5709                 cur = cur->next;
5710                 if (node->is_data)
5711                         return 0;
5712                 back = to_tree_backref(node);
5713                 if (node->full_backref)
5714                         return 0;
5715                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5716                         is_extent = 1;
5717         }
5718         return is_extent;
5719 }
5720
5721
5722 static int record_bad_block_io(struct btrfs_fs_info *info,
5723                                struct cache_tree *extent_cache,
5724                                u64 start, u64 len)
5725 {
5726         struct extent_record *rec;
5727         struct cache_extent *cache;
5728         struct btrfs_key key;
5729
5730         cache = lookup_cache_extent(extent_cache, start, len);
5731         if (!cache)
5732                 return 0;
5733
5734         rec = container_of(cache, struct extent_record, cache);
5735         if (!is_extent_tree_record(rec))
5736                 return 0;
5737
5738         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5739         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5740 }
5741
5742 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5743                        struct extent_buffer *buf, int slot)
5744 {
5745         if (btrfs_header_level(buf)) {
5746                 struct btrfs_key_ptr ptr1, ptr2;
5747
5748                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5749                                    sizeof(struct btrfs_key_ptr));
5750                 read_extent_buffer(buf, &ptr2,
5751                                    btrfs_node_key_ptr_offset(slot + 1),
5752                                    sizeof(struct btrfs_key_ptr));
5753                 write_extent_buffer(buf, &ptr1,
5754                                     btrfs_node_key_ptr_offset(slot + 1),
5755                                     sizeof(struct btrfs_key_ptr));
5756                 write_extent_buffer(buf, &ptr2,
5757                                     btrfs_node_key_ptr_offset(slot),
5758                                     sizeof(struct btrfs_key_ptr));
5759                 if (slot == 0) {
5760                         struct btrfs_disk_key key;
5761                         btrfs_node_key(buf, &key, 0);
5762                         btrfs_fixup_low_keys(root, path, &key,
5763                                              btrfs_header_level(buf) + 1);
5764                 }
5765         } else {
5766                 struct btrfs_item *item1, *item2;
5767                 struct btrfs_key k1, k2;
5768                 char *item1_data, *item2_data;
5769                 u32 item1_offset, item2_offset, item1_size, item2_size;
5770
5771                 item1 = btrfs_item_nr(slot);
5772                 item2 = btrfs_item_nr(slot + 1);
5773                 btrfs_item_key_to_cpu(buf, &k1, slot);
5774                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5775                 item1_offset = btrfs_item_offset(buf, item1);
5776                 item2_offset = btrfs_item_offset(buf, item2);
5777                 item1_size = btrfs_item_size(buf, item1);
5778                 item2_size = btrfs_item_size(buf, item2);
5779
5780                 item1_data = malloc(item1_size);
5781                 if (!item1_data)
5782                         return -ENOMEM;
5783                 item2_data = malloc(item2_size);
5784                 if (!item2_data) {
5785                         free(item1_data);
5786                         return -ENOMEM;
5787                 }
5788
5789                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5790                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5791
5792                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5793                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5794                 free(item1_data);
5795                 free(item2_data);
5796
5797                 btrfs_set_item_offset(buf, item1, item2_offset);
5798                 btrfs_set_item_offset(buf, item2, item1_offset);
5799                 btrfs_set_item_size(buf, item1, item2_size);
5800                 btrfs_set_item_size(buf, item2, item1_size);
5801
5802                 path->slots[0] = slot;
5803                 btrfs_set_item_key_unsafe(root, path, &k2);
5804                 path->slots[0] = slot + 1;
5805                 btrfs_set_item_key_unsafe(root, path, &k1);
5806         }
5807         return 0;
5808 }
5809
5810 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5811 {
5812         struct extent_buffer *buf;
5813         struct btrfs_key k1, k2;
5814         int i;
5815         int level = path->lowest_level;
5816         int ret = -EIO;
5817
5818         buf = path->nodes[level];
5819         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5820                 if (level) {
5821                         btrfs_node_key_to_cpu(buf, &k1, i);
5822                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5823                 } else {
5824                         btrfs_item_key_to_cpu(buf, &k1, i);
5825                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5826                 }
5827                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5828                         continue;
5829                 ret = swap_values(root, path, buf, i);
5830                 if (ret)
5831                         break;
5832                 btrfs_mark_buffer_dirty(buf);
5833                 i = 0;
5834         }
5835         return ret;
5836 }
5837
5838 static int delete_bogus_item(struct btrfs_root *root,
5839                              struct btrfs_path *path,
5840                              struct extent_buffer *buf, int slot)
5841 {
5842         struct btrfs_key key;
5843         int nritems = btrfs_header_nritems(buf);
5844
5845         btrfs_item_key_to_cpu(buf, &key, slot);
5846
5847         /* These are all the keys we can deal with missing. */
5848         if (key.type != BTRFS_DIR_INDEX_KEY &&
5849             key.type != BTRFS_EXTENT_ITEM_KEY &&
5850             key.type != BTRFS_METADATA_ITEM_KEY &&
5851             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5852             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5853                 return -1;
5854
5855         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5856                (unsigned long long)key.objectid, key.type,
5857                (unsigned long long)key.offset, slot, buf->start);
5858         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5859                               btrfs_item_nr_offset(slot + 1),
5860                               sizeof(struct btrfs_item) *
5861                               (nritems - slot - 1));
5862         btrfs_set_header_nritems(buf, nritems - 1);
5863         if (slot == 0) {
5864                 struct btrfs_disk_key disk_key;
5865
5866                 btrfs_item_key(buf, &disk_key, 0);
5867                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5868         }
5869         btrfs_mark_buffer_dirty(buf);
5870         return 0;
5871 }
5872
5873 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5874 {
5875         struct extent_buffer *buf;
5876         int i;
5877         int ret = 0;
5878
5879         /* We should only get this for leaves */
5880         BUG_ON(path->lowest_level);
5881         buf = path->nodes[0];
5882 again:
5883         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5884                 unsigned int shift = 0, offset;
5885
5886                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5887                     BTRFS_LEAF_DATA_SIZE(root)) {
5888                         if (btrfs_item_end_nr(buf, i) >
5889                             BTRFS_LEAF_DATA_SIZE(root)) {
5890                                 ret = delete_bogus_item(root, path, buf, i);
5891                                 if (!ret)
5892                                         goto again;
5893                                 fprintf(stderr, "item is off the end of the "
5894                                         "leaf, can't fix\n");
5895                                 ret = -EIO;
5896                                 break;
5897                         }
5898                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5899                                 btrfs_item_end_nr(buf, i);
5900                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5901                            btrfs_item_offset_nr(buf, i - 1)) {
5902                         if (btrfs_item_end_nr(buf, i) >
5903                             btrfs_item_offset_nr(buf, i - 1)) {
5904                                 ret = delete_bogus_item(root, path, buf, i);
5905                                 if (!ret)
5906                                         goto again;
5907                                 fprintf(stderr, "items overlap, can't fix\n");
5908                                 ret = -EIO;
5909                                 break;
5910                         }
5911                         shift = btrfs_item_offset_nr(buf, i - 1) -
5912                                 btrfs_item_end_nr(buf, i);
5913                 }
5914                 if (!shift)
5915                         continue;
5916
5917                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5918                        i, shift, (unsigned long long)buf->start);
5919                 offset = btrfs_item_offset_nr(buf, i);
5920                 memmove_extent_buffer(buf,
5921                                       btrfs_leaf_data(buf) + offset + shift,
5922                                       btrfs_leaf_data(buf) + offset,
5923                                       btrfs_item_size_nr(buf, i));
5924                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5925                                       offset + shift);
5926                 btrfs_mark_buffer_dirty(buf);
5927         }
5928
5929         /*
5930          * We may have moved things, in which case we want to exit so we don't
5931          * write those changes out.  Once we have proper abort functionality in
5932          * progs this can be changed to something nicer.
5933          */
5934         BUG_ON(ret);
5935         return ret;
5936 }
5937
5938 /*
5939  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5940  * then just return -EIO.
5941  */
5942 static int try_to_fix_bad_block(struct btrfs_root *root,
5943                                 struct extent_buffer *buf,
5944                                 enum btrfs_tree_block_status status)
5945 {
5946         struct btrfs_trans_handle *trans;
5947         struct ulist *roots;
5948         struct ulist_node *node;
5949         struct btrfs_root *search_root;
5950         struct btrfs_path path;
5951         struct ulist_iterator iter;
5952         struct btrfs_key root_key, key;
5953         int ret;
5954
5955         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5956             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5957                 return -EIO;
5958
5959         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5960         if (ret)
5961                 return -EIO;
5962
5963         btrfs_init_path(&path);
5964         ULIST_ITER_INIT(&iter);
5965         while ((node = ulist_next(roots, &iter))) {
5966                 root_key.objectid = node->val;
5967                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5968                 root_key.offset = (u64)-1;
5969
5970                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5971                 if (IS_ERR(root)) {
5972                         ret = -EIO;
5973                         break;
5974                 }
5975
5976
5977                 trans = btrfs_start_transaction(search_root, 0);
5978                 if (IS_ERR(trans)) {
5979                         ret = PTR_ERR(trans);
5980                         break;
5981                 }
5982
5983                 path.lowest_level = btrfs_header_level(buf);
5984                 path.skip_check_block = 1;
5985                 if (path.lowest_level)
5986                         btrfs_node_key_to_cpu(buf, &key, 0);
5987                 else
5988                         btrfs_item_key_to_cpu(buf, &key, 0);
5989                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5990                 if (ret) {
5991                         ret = -EIO;
5992                         btrfs_commit_transaction(trans, search_root);
5993                         break;
5994                 }
5995                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5996                         ret = fix_key_order(search_root, &path);
5997                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5998                         ret = fix_item_offset(search_root, &path);
5999                 if (ret) {
6000                         btrfs_commit_transaction(trans, search_root);
6001                         break;
6002                 }
6003                 btrfs_release_path(&path);
6004                 btrfs_commit_transaction(trans, search_root);
6005         }
6006         ulist_free(roots);
6007         btrfs_release_path(&path);
6008         return ret;
6009 }
6010
6011 static int check_block(struct btrfs_root *root,
6012                        struct cache_tree *extent_cache,
6013                        struct extent_buffer *buf, u64 flags)
6014 {
6015         struct extent_record *rec;
6016         struct cache_extent *cache;
6017         struct btrfs_key key;
6018         enum btrfs_tree_block_status status;
6019         int ret = 0;
6020         int level;
6021
6022         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6023         if (!cache)
6024                 return 1;
6025         rec = container_of(cache, struct extent_record, cache);
6026         rec->generation = btrfs_header_generation(buf);
6027
6028         level = btrfs_header_level(buf);
6029         if (btrfs_header_nritems(buf) > 0) {
6030
6031                 if (level == 0)
6032                         btrfs_item_key_to_cpu(buf, &key, 0);
6033                 else
6034                         btrfs_node_key_to_cpu(buf, &key, 0);
6035
6036                 rec->info_objectid = key.objectid;
6037         }
6038         rec->info_level = level;
6039
6040         if (btrfs_is_leaf(buf))
6041                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6042         else
6043                 status = btrfs_check_node(root, &rec->parent_key, buf);
6044
6045         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6046                 if (repair)
6047                         status = try_to_fix_bad_block(root, buf, status);
6048                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6049                         ret = -EIO;
6050                         fprintf(stderr, "bad block %llu\n",
6051                                 (unsigned long long)buf->start);
6052                 } else {
6053                         /*
6054                          * Signal to callers we need to start the scan over
6055                          * again since we'll have cowed blocks.
6056                          */
6057                         ret = -EAGAIN;
6058                 }
6059         } else {
6060                 rec->content_checked = 1;
6061                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6062                         rec->owner_ref_checked = 1;
6063                 else {
6064                         ret = check_owner_ref(root, rec, buf);
6065                         if (!ret)
6066                                 rec->owner_ref_checked = 1;
6067                 }
6068         }
6069         if (!ret)
6070                 maybe_free_extent_rec(extent_cache, rec);
6071         return ret;
6072 }
6073
6074 #if 0
6075 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6076                                                 u64 parent, u64 root)
6077 {
6078         struct list_head *cur = rec->backrefs.next;
6079         struct extent_backref *node;
6080         struct tree_backref *back;
6081
6082         while(cur != &rec->backrefs) {
6083                 node = to_extent_backref(cur);
6084                 cur = cur->next;
6085                 if (node->is_data)
6086                         continue;
6087                 back = to_tree_backref(node);
6088                 if (parent > 0) {
6089                         if (!node->full_backref)
6090                                 continue;
6091                         if (parent == back->parent)
6092                                 return back;
6093                 } else {
6094                         if (node->full_backref)
6095                                 continue;
6096                         if (back->root == root)
6097                                 return back;
6098                 }
6099         }
6100         return NULL;
6101 }
6102 #endif
6103
6104 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6105                                                 u64 parent, u64 root)
6106 {
6107         struct tree_backref *ref = malloc(sizeof(*ref));
6108
6109         if (!ref)
6110                 return NULL;
6111         memset(&ref->node, 0, sizeof(ref->node));
6112         if (parent > 0) {
6113                 ref->parent = parent;
6114                 ref->node.full_backref = 1;
6115         } else {
6116                 ref->root = root;
6117                 ref->node.full_backref = 0;
6118         }
6119         list_add_tail(&ref->node.list, &rec->backrefs);
6120
6121         return ref;
6122 }
6123
6124 #if 0
6125 static struct data_backref *find_data_backref(struct extent_record *rec,
6126                                                 u64 parent, u64 root,
6127                                                 u64 owner, u64 offset,
6128                                                 int found_ref,
6129                                                 u64 disk_bytenr, u64 bytes)
6130 {
6131         struct list_head *cur = rec->backrefs.next;
6132         struct extent_backref *node;
6133         struct data_backref *back;
6134
6135         while(cur != &rec->backrefs) {
6136                 node = to_extent_backref(cur);
6137                 cur = cur->next;
6138                 if (!node->is_data)
6139                         continue;
6140                 back = to_data_backref(node);
6141                 if (parent > 0) {
6142                         if (!node->full_backref)
6143                                 continue;
6144                         if (parent == back->parent)
6145                                 return back;
6146                 } else {
6147                         if (node->full_backref)
6148                                 continue;
6149                         if (back->root == root && back->owner == owner &&
6150                             back->offset == offset) {
6151                                 if (found_ref && node->found_ref &&
6152                                     (back->bytes != bytes ||
6153                                     back->disk_bytenr != disk_bytenr))
6154                                         continue;
6155                                 return back;
6156                         }
6157                 }
6158         }
6159         return NULL;
6160 }
6161 #endif
6162
6163 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6164                                                 u64 parent, u64 root,
6165                                                 u64 owner, u64 offset,
6166                                                 u64 max_size)
6167 {
6168         struct data_backref *ref = malloc(sizeof(*ref));
6169
6170         if (!ref)
6171                 return NULL;
6172         memset(&ref->node, 0, sizeof(ref->node));
6173         ref->node.is_data = 1;
6174
6175         if (parent > 0) {
6176                 ref->parent = parent;
6177                 ref->owner = 0;
6178                 ref->offset = 0;
6179                 ref->node.full_backref = 1;
6180         } else {
6181                 ref->root = root;
6182                 ref->owner = owner;
6183                 ref->offset = offset;
6184                 ref->node.full_backref = 0;
6185         }
6186         ref->bytes = max_size;
6187         ref->found_ref = 0;
6188         ref->num_refs = 0;
6189         list_add_tail(&ref->node.list, &rec->backrefs);
6190         if (max_size > rec->max_size)
6191                 rec->max_size = max_size;
6192         return ref;
6193 }
6194
6195 /* Check if the type of extent matches with its chunk */
6196 static void check_extent_type(struct extent_record *rec)
6197 {
6198         struct btrfs_block_group_cache *bg_cache;
6199
6200         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6201         if (!bg_cache)
6202                 return;
6203
6204         /* data extent, check chunk directly*/
6205         if (!rec->metadata) {
6206                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6207                         rec->wrong_chunk_type = 1;
6208                 return;
6209         }
6210
6211         /* metadata extent, check the obvious case first */
6212         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6213                                  BTRFS_BLOCK_GROUP_METADATA))) {
6214                 rec->wrong_chunk_type = 1;
6215                 return;
6216         }
6217
6218         /*
6219          * Check SYSTEM extent, as it's also marked as metadata, we can only
6220          * make sure it's a SYSTEM extent by its backref
6221          */
6222         if (!list_empty(&rec->backrefs)) {
6223                 struct extent_backref *node;
6224                 struct tree_backref *tback;
6225                 u64 bg_type;
6226
6227                 node = to_extent_backref(rec->backrefs.next);
6228                 if (node->is_data) {
6229                         /* tree block shouldn't have data backref */
6230                         rec->wrong_chunk_type = 1;
6231                         return;
6232                 }
6233                 tback = container_of(node, struct tree_backref, node);
6234
6235                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6236                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6237                 else
6238                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6239                 if (!(bg_cache->flags & bg_type))
6240                         rec->wrong_chunk_type = 1;
6241         }
6242 }
6243
6244 /*
6245  * Allocate a new extent record, fill default values from @tmpl and insert int
6246  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6247  * the cache, otherwise it fails.
6248  */
6249 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6250                 struct extent_record *tmpl)
6251 {
6252         struct extent_record *rec;
6253         int ret = 0;
6254
6255         BUG_ON(tmpl->max_size == 0);
6256         rec = malloc(sizeof(*rec));
6257         if (!rec)
6258                 return -ENOMEM;
6259         rec->start = tmpl->start;
6260         rec->max_size = tmpl->max_size;
6261         rec->nr = max(tmpl->nr, tmpl->max_size);
6262         rec->found_rec = tmpl->found_rec;
6263         rec->content_checked = tmpl->content_checked;
6264         rec->owner_ref_checked = tmpl->owner_ref_checked;
6265         rec->num_duplicates = 0;
6266         rec->metadata = tmpl->metadata;
6267         rec->flag_block_full_backref = FLAG_UNSET;
6268         rec->bad_full_backref = 0;
6269         rec->crossing_stripes = 0;
6270         rec->wrong_chunk_type = 0;
6271         rec->is_root = tmpl->is_root;
6272         rec->refs = tmpl->refs;
6273         rec->extent_item_refs = tmpl->extent_item_refs;
6274         rec->parent_generation = tmpl->parent_generation;
6275         INIT_LIST_HEAD(&rec->backrefs);
6276         INIT_LIST_HEAD(&rec->dups);
6277         INIT_LIST_HEAD(&rec->list);
6278         rec->backref_tree = RB_ROOT;
6279         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6280         rec->cache.start = tmpl->start;
6281         rec->cache.size = tmpl->nr;
6282         ret = insert_cache_extent(extent_cache, &rec->cache);
6283         if (ret) {
6284                 free(rec);
6285                 return ret;
6286         }
6287         bytes_used += rec->nr;
6288
6289         if (tmpl->metadata)
6290                 rec->crossing_stripes = check_crossing_stripes(global_info,
6291                                 rec->start, global_info->nodesize);
6292         check_extent_type(rec);
6293         return ret;
6294 }
6295
6296 /*
6297  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6298  * some are hints:
6299  * - refs              - if found, increase refs
6300  * - is_root           - if found, set
6301  * - content_checked   - if found, set
6302  * - owner_ref_checked - if found, set
6303  *
6304  * If not found, create a new one, initialize and insert.
6305  */
6306 static int add_extent_rec(struct cache_tree *extent_cache,
6307                 struct extent_record *tmpl)
6308 {
6309         struct extent_record *rec;
6310         struct cache_extent *cache;
6311         int ret = 0;
6312         int dup = 0;
6313
6314         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6315         if (cache) {
6316                 rec = container_of(cache, struct extent_record, cache);
6317                 if (tmpl->refs)
6318                         rec->refs++;
6319                 if (rec->nr == 1)
6320                         rec->nr = max(tmpl->nr, tmpl->max_size);
6321
6322                 /*
6323                  * We need to make sure to reset nr to whatever the extent
6324                  * record says was the real size, this way we can compare it to
6325                  * the backrefs.
6326                  */
6327                 if (tmpl->found_rec) {
6328                         if (tmpl->start != rec->start || rec->found_rec) {
6329                                 struct extent_record *tmp;
6330
6331                                 dup = 1;
6332                                 if (list_empty(&rec->list))
6333                                         list_add_tail(&rec->list,
6334                                                       &duplicate_extents);
6335
6336                                 /*
6337                                  * We have to do this song and dance in case we
6338                                  * find an extent record that falls inside of
6339                                  * our current extent record but does not have
6340                                  * the same objectid.
6341                                  */
6342                                 tmp = malloc(sizeof(*tmp));
6343                                 if (!tmp)
6344                                         return -ENOMEM;
6345                                 tmp->start = tmpl->start;
6346                                 tmp->max_size = tmpl->max_size;
6347                                 tmp->nr = tmpl->nr;
6348                                 tmp->found_rec = 1;
6349                                 tmp->metadata = tmpl->metadata;
6350                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6351                                 INIT_LIST_HEAD(&tmp->list);
6352                                 list_add_tail(&tmp->list, &rec->dups);
6353                                 rec->num_duplicates++;
6354                         } else {
6355                                 rec->nr = tmpl->nr;
6356                                 rec->found_rec = 1;
6357                         }
6358                 }
6359
6360                 if (tmpl->extent_item_refs && !dup) {
6361                         if (rec->extent_item_refs) {
6362                                 fprintf(stderr, "block %llu rec "
6363                                         "extent_item_refs %llu, passed %llu\n",
6364                                         (unsigned long long)tmpl->start,
6365                                         (unsigned long long)
6366                                                         rec->extent_item_refs,
6367                                         (unsigned long long)tmpl->extent_item_refs);
6368                         }
6369                         rec->extent_item_refs = tmpl->extent_item_refs;
6370                 }
6371                 if (tmpl->is_root)
6372                         rec->is_root = 1;
6373                 if (tmpl->content_checked)
6374                         rec->content_checked = 1;
6375                 if (tmpl->owner_ref_checked)
6376                         rec->owner_ref_checked = 1;
6377                 memcpy(&rec->parent_key, &tmpl->parent_key,
6378                                 sizeof(tmpl->parent_key));
6379                 if (tmpl->parent_generation)
6380                         rec->parent_generation = tmpl->parent_generation;
6381                 if (rec->max_size < tmpl->max_size)
6382                         rec->max_size = tmpl->max_size;
6383
6384                 /*
6385                  * A metadata extent can't cross stripe_len boundary, otherwise
6386                  * kernel scrub won't be able to handle it.
6387                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6388                  * it.
6389                  */
6390                 if (tmpl->metadata)
6391                         rec->crossing_stripes = check_crossing_stripes(
6392                                         global_info, rec->start,
6393                                         global_info->nodesize);
6394                 check_extent_type(rec);
6395                 maybe_free_extent_rec(extent_cache, rec);
6396                 return ret;
6397         }
6398
6399         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6400
6401         return ret;
6402 }
6403
6404 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6405                             u64 parent, u64 root, int found_ref)
6406 {
6407         struct extent_record *rec;
6408         struct tree_backref *back;
6409         struct cache_extent *cache;
6410         int ret;
6411         bool insert = false;
6412
6413         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6414         if (!cache) {
6415                 struct extent_record tmpl;
6416
6417                 memset(&tmpl, 0, sizeof(tmpl));
6418                 tmpl.start = bytenr;
6419                 tmpl.nr = 1;
6420                 tmpl.metadata = 1;
6421                 tmpl.max_size = 1;
6422
6423                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6424                 if (ret)
6425                         return ret;
6426
6427                 /* really a bug in cache_extent implement now */
6428                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6429                 if (!cache)
6430                         return -ENOENT;
6431         }
6432
6433         rec = container_of(cache, struct extent_record, cache);
6434         if (rec->start != bytenr) {
6435                 /*
6436                  * Several cause, from unaligned bytenr to over lapping extents
6437                  */
6438                 return -EEXIST;
6439         }
6440
6441         back = find_tree_backref(rec, parent, root);
6442         if (!back) {
6443                 back = alloc_tree_backref(rec, parent, root);
6444                 if (!back)
6445                         return -ENOMEM;
6446                 insert = true;
6447         }
6448
6449         if (found_ref) {
6450                 if (back->node.found_ref) {
6451                         fprintf(stderr, "Extent back ref already exists "
6452                                 "for %llu parent %llu root %llu \n",
6453                                 (unsigned long long)bytenr,
6454                                 (unsigned long long)parent,
6455                                 (unsigned long long)root);
6456                 }
6457                 back->node.found_ref = 1;
6458         } else {
6459                 if (back->node.found_extent_tree) {
6460                         fprintf(stderr, "Extent back ref already exists "
6461                                 "for %llu parent %llu root %llu \n",
6462                                 (unsigned long long)bytenr,
6463                                 (unsigned long long)parent,
6464                                 (unsigned long long)root);
6465                 }
6466                 back->node.found_extent_tree = 1;
6467         }
6468         if (insert)
6469                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6470                         compare_extent_backref));
6471         check_extent_type(rec);
6472         maybe_free_extent_rec(extent_cache, rec);
6473         return 0;
6474 }
6475
6476 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6477                             u64 parent, u64 root, u64 owner, u64 offset,
6478                             u32 num_refs, int found_ref, u64 max_size)
6479 {
6480         struct extent_record *rec;
6481         struct data_backref *back;
6482         struct cache_extent *cache;
6483         int ret;
6484         bool insert = false;
6485
6486         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6487         if (!cache) {
6488                 struct extent_record tmpl;
6489
6490                 memset(&tmpl, 0, sizeof(tmpl));
6491                 tmpl.start = bytenr;
6492                 tmpl.nr = 1;
6493                 tmpl.max_size = max_size;
6494
6495                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6496                 if (ret)
6497                         return ret;
6498
6499                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6500                 if (!cache)
6501                         abort();
6502         }
6503
6504         rec = container_of(cache, struct extent_record, cache);
6505         if (rec->max_size < max_size)
6506                 rec->max_size = max_size;
6507
6508         /*
6509          * If found_ref is set then max_size is the real size and must match the
6510          * existing refs.  So if we have already found a ref then we need to
6511          * make sure that this ref matches the existing one, otherwise we need
6512          * to add a new backref so we can notice that the backrefs don't match
6513          * and we need to figure out who is telling the truth.  This is to
6514          * account for that awful fsync bug I introduced where we'd end up with
6515          * a btrfs_file_extent_item that would have its length include multiple
6516          * prealloc extents or point inside of a prealloc extent.
6517          */
6518         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6519                                  bytenr, max_size);
6520         if (!back) {
6521                 back = alloc_data_backref(rec, parent, root, owner, offset,
6522                                           max_size);
6523                 BUG_ON(!back);
6524                 insert = true;
6525         }
6526
6527         if (found_ref) {
6528                 BUG_ON(num_refs != 1);
6529                 if (back->node.found_ref)
6530                         BUG_ON(back->bytes != max_size);
6531                 back->node.found_ref = 1;
6532                 back->found_ref += 1;
6533                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6534                         back->bytes = max_size;
6535                         back->disk_bytenr = bytenr;
6536
6537                         /* Need to reinsert if not already in the tree */
6538                         if (!insert) {
6539                                 rb_erase(&back->node.node, &rec->backref_tree);
6540                                 insert = true;
6541                         }
6542                 }
6543                 rec->refs += 1;
6544                 rec->content_checked = 1;
6545                 rec->owner_ref_checked = 1;
6546         } else {
6547                 if (back->node.found_extent_tree) {
6548                         fprintf(stderr, "Extent back ref already exists "
6549                                 "for %llu parent %llu root %llu "
6550                                 "owner %llu offset %llu num_refs %lu\n",
6551                                 (unsigned long long)bytenr,
6552                                 (unsigned long long)parent,
6553                                 (unsigned long long)root,
6554                                 (unsigned long long)owner,
6555                                 (unsigned long long)offset,
6556                                 (unsigned long)num_refs);
6557                 }
6558                 back->num_refs = num_refs;
6559                 back->node.found_extent_tree = 1;
6560         }
6561         if (insert)
6562                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6563                         compare_extent_backref));
6564
6565         maybe_free_extent_rec(extent_cache, rec);
6566         return 0;
6567 }
6568
6569 static int add_pending(struct cache_tree *pending,
6570                        struct cache_tree *seen, u64 bytenr, u32 size)
6571 {
6572         int ret;
6573         ret = add_cache_extent(seen, bytenr, size);
6574         if (ret)
6575                 return ret;
6576         add_cache_extent(pending, bytenr, size);
6577         return 0;
6578 }
6579
6580 static int pick_next_pending(struct cache_tree *pending,
6581                         struct cache_tree *reada,
6582                         struct cache_tree *nodes,
6583                         u64 last, struct block_info *bits, int bits_nr,
6584                         int *reada_bits)
6585 {
6586         unsigned long node_start = last;
6587         struct cache_extent *cache;
6588         int ret;
6589
6590         cache = search_cache_extent(reada, 0);
6591         if (cache) {
6592                 bits[0].start = cache->start;
6593                 bits[0].size = cache->size;
6594                 *reada_bits = 1;
6595                 return 1;
6596         }
6597         *reada_bits = 0;
6598         if (node_start > 32768)
6599                 node_start -= 32768;
6600
6601         cache = search_cache_extent(nodes, node_start);
6602         if (!cache)
6603                 cache = search_cache_extent(nodes, 0);
6604
6605         if (!cache) {
6606                  cache = search_cache_extent(pending, 0);
6607                  if (!cache)
6608                          return 0;
6609                  ret = 0;
6610                  do {
6611                          bits[ret].start = cache->start;
6612                          bits[ret].size = cache->size;
6613                          cache = next_cache_extent(cache);
6614                          ret++;
6615                  } while (cache && ret < bits_nr);
6616                  return ret;
6617         }
6618
6619         ret = 0;
6620         do {
6621                 bits[ret].start = cache->start;
6622                 bits[ret].size = cache->size;
6623                 cache = next_cache_extent(cache);
6624                 ret++;
6625         } while (cache && ret < bits_nr);
6626
6627         if (bits_nr - ret > 8) {
6628                 u64 lookup = bits[0].start + bits[0].size;
6629                 struct cache_extent *next;
6630                 next = search_cache_extent(pending, lookup);
6631                 while(next) {
6632                         if (next->start - lookup > 32768)
6633                                 break;
6634                         bits[ret].start = next->start;
6635                         bits[ret].size = next->size;
6636                         lookup = next->start + next->size;
6637                         ret++;
6638                         if (ret == bits_nr)
6639                                 break;
6640                         next = next_cache_extent(next);
6641                         if (!next)
6642                                 break;
6643                 }
6644         }
6645         return ret;
6646 }
6647
6648 static void free_chunk_record(struct cache_extent *cache)
6649 {
6650         struct chunk_record *rec;
6651
6652         rec = container_of(cache, struct chunk_record, cache);
6653         list_del_init(&rec->list);
6654         list_del_init(&rec->dextents);
6655         free(rec);
6656 }
6657
6658 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6659 {
6660         cache_tree_free_extents(chunk_cache, free_chunk_record);
6661 }
6662
6663 static void free_device_record(struct rb_node *node)
6664 {
6665         struct device_record *rec;
6666
6667         rec = container_of(node, struct device_record, node);
6668         free(rec);
6669 }
6670
6671 FREE_RB_BASED_TREE(device_cache, free_device_record);
6672
6673 int insert_block_group_record(struct block_group_tree *tree,
6674                               struct block_group_record *bg_rec)
6675 {
6676         int ret;
6677
6678         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6679         if (ret)
6680                 return ret;
6681
6682         list_add_tail(&bg_rec->list, &tree->block_groups);
6683         return 0;
6684 }
6685
6686 static void free_block_group_record(struct cache_extent *cache)
6687 {
6688         struct block_group_record *rec;
6689
6690         rec = container_of(cache, struct block_group_record, cache);
6691         list_del_init(&rec->list);
6692         free(rec);
6693 }
6694
6695 void free_block_group_tree(struct block_group_tree *tree)
6696 {
6697         cache_tree_free_extents(&tree->tree, free_block_group_record);
6698 }
6699
6700 int insert_device_extent_record(struct device_extent_tree *tree,
6701                                 struct device_extent_record *de_rec)
6702 {
6703         int ret;
6704
6705         /*
6706          * Device extent is a bit different from the other extents, because
6707          * the extents which belong to the different devices may have the
6708          * same start and size, so we need use the special extent cache
6709          * search/insert functions.
6710          */
6711         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6712         if (ret)
6713                 return ret;
6714
6715         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6716         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6717         return 0;
6718 }
6719
6720 static void free_device_extent_record(struct cache_extent *cache)
6721 {
6722         struct device_extent_record *rec;
6723
6724         rec = container_of(cache, struct device_extent_record, cache);
6725         if (!list_empty(&rec->chunk_list))
6726                 list_del_init(&rec->chunk_list);
6727         if (!list_empty(&rec->device_list))
6728                 list_del_init(&rec->device_list);
6729         free(rec);
6730 }
6731
6732 void free_device_extent_tree(struct device_extent_tree *tree)
6733 {
6734         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6735 }
6736
6737 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6738 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6739                                  struct extent_buffer *leaf, int slot)
6740 {
6741         struct btrfs_extent_ref_v0 *ref0;
6742         struct btrfs_key key;
6743         int ret;
6744
6745         btrfs_item_key_to_cpu(leaf, &key, slot);
6746         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6747         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6748                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6749                                 0, 0);
6750         } else {
6751                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6752                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6753         }
6754         return ret;
6755 }
6756 #endif
6757
6758 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6759                                             struct btrfs_key *key,
6760                                             int slot)
6761 {
6762         struct btrfs_chunk *ptr;
6763         struct chunk_record *rec;
6764         int num_stripes, i;
6765
6766         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6767         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6768
6769         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6770         if (!rec) {
6771                 fprintf(stderr, "memory allocation failed\n");
6772                 exit(-1);
6773         }
6774
6775         INIT_LIST_HEAD(&rec->list);
6776         INIT_LIST_HEAD(&rec->dextents);
6777         rec->bg_rec = NULL;
6778
6779         rec->cache.start = key->offset;
6780         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6781
6782         rec->generation = btrfs_header_generation(leaf);
6783
6784         rec->objectid = key->objectid;
6785         rec->type = key->type;
6786         rec->offset = key->offset;
6787
6788         rec->length = rec->cache.size;
6789         rec->owner = btrfs_chunk_owner(leaf, ptr);
6790         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6791         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6792         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6793         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6794         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6795         rec->num_stripes = num_stripes;
6796         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6797
6798         for (i = 0; i < rec->num_stripes; ++i) {
6799                 rec->stripes[i].devid =
6800                         btrfs_stripe_devid_nr(leaf, ptr, i);
6801                 rec->stripes[i].offset =
6802                         btrfs_stripe_offset_nr(leaf, ptr, i);
6803                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6804                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6805                                 BTRFS_UUID_SIZE);
6806         }
6807
6808         return rec;
6809 }
6810
6811 static int process_chunk_item(struct cache_tree *chunk_cache,
6812                               struct btrfs_key *key, struct extent_buffer *eb,
6813                               int slot)
6814 {
6815         struct chunk_record *rec;
6816         struct btrfs_chunk *chunk;
6817         int ret = 0;
6818
6819         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6820         /*
6821          * Do extra check for this chunk item,
6822          *
6823          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6824          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6825          * and owner<->key_type check.
6826          */
6827         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6828                                       key->offset);
6829         if (ret < 0) {
6830                 error("chunk(%llu, %llu) is not valid, ignore it",
6831                       key->offset, btrfs_chunk_length(eb, chunk));
6832                 return 0;
6833         }
6834         rec = btrfs_new_chunk_record(eb, key, slot);
6835         ret = insert_cache_extent(chunk_cache, &rec->cache);
6836         if (ret) {
6837                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6838                         rec->offset, rec->length);
6839                 free(rec);
6840         }
6841
6842         return ret;
6843 }
6844
6845 static int process_device_item(struct rb_root *dev_cache,
6846                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6847 {
6848         struct btrfs_dev_item *ptr;
6849         struct device_record *rec;
6850         int ret = 0;
6851
6852         ptr = btrfs_item_ptr(eb,
6853                 slot, struct btrfs_dev_item);
6854
6855         rec = malloc(sizeof(*rec));
6856         if (!rec) {
6857                 fprintf(stderr, "memory allocation failed\n");
6858                 return -ENOMEM;
6859         }
6860
6861         rec->devid = key->offset;
6862         rec->generation = btrfs_header_generation(eb);
6863
6864         rec->objectid = key->objectid;
6865         rec->type = key->type;
6866         rec->offset = key->offset;
6867
6868         rec->devid = btrfs_device_id(eb, ptr);
6869         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6870         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6871
6872         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6873         if (ret) {
6874                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6875                 free(rec);
6876         }
6877
6878         return ret;
6879 }
6880
6881 struct block_group_record *
6882 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6883                              int slot)
6884 {
6885         struct btrfs_block_group_item *ptr;
6886         struct block_group_record *rec;
6887
6888         rec = calloc(1, sizeof(*rec));
6889         if (!rec) {
6890                 fprintf(stderr, "memory allocation failed\n");
6891                 exit(-1);
6892         }
6893
6894         rec->cache.start = key->objectid;
6895         rec->cache.size = key->offset;
6896
6897         rec->generation = btrfs_header_generation(leaf);
6898
6899         rec->objectid = key->objectid;
6900         rec->type = key->type;
6901         rec->offset = key->offset;
6902
6903         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6904         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6905
6906         INIT_LIST_HEAD(&rec->list);
6907
6908         return rec;
6909 }
6910
6911 static int process_block_group_item(struct block_group_tree *block_group_cache,
6912                                     struct btrfs_key *key,
6913                                     struct extent_buffer *eb, int slot)
6914 {
6915         struct block_group_record *rec;
6916         int ret = 0;
6917
6918         rec = btrfs_new_block_group_record(eb, key, slot);
6919         ret = insert_block_group_record(block_group_cache, rec);
6920         if (ret) {
6921                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6922                         rec->objectid, rec->offset);
6923                 free(rec);
6924         }
6925
6926         return ret;
6927 }
6928
6929 struct device_extent_record *
6930 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6931                                struct btrfs_key *key, int slot)
6932 {
6933         struct device_extent_record *rec;
6934         struct btrfs_dev_extent *ptr;
6935
6936         rec = calloc(1, sizeof(*rec));
6937         if (!rec) {
6938                 fprintf(stderr, "memory allocation failed\n");
6939                 exit(-1);
6940         }
6941
6942         rec->cache.objectid = key->objectid;
6943         rec->cache.start = key->offset;
6944
6945         rec->generation = btrfs_header_generation(leaf);
6946
6947         rec->objectid = key->objectid;
6948         rec->type = key->type;
6949         rec->offset = key->offset;
6950
6951         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6952         rec->chunk_objecteid =
6953                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6954         rec->chunk_offset =
6955                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6956         rec->length = btrfs_dev_extent_length(leaf, ptr);
6957         rec->cache.size = rec->length;
6958
6959         INIT_LIST_HEAD(&rec->chunk_list);
6960         INIT_LIST_HEAD(&rec->device_list);
6961
6962         return rec;
6963 }
6964
6965 static int
6966 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6967                            struct btrfs_key *key, struct extent_buffer *eb,
6968                            int slot)
6969 {
6970         struct device_extent_record *rec;
6971         int ret;
6972
6973         rec = btrfs_new_device_extent_record(eb, key, slot);
6974         ret = insert_device_extent_record(dev_extent_cache, rec);
6975         if (ret) {
6976                 fprintf(stderr,
6977                         "Device extent[%llu, %llu, %llu] existed.\n",
6978                         rec->objectid, rec->offset, rec->length);
6979                 free(rec);
6980         }
6981
6982         return ret;
6983 }
6984
6985 static int process_extent_item(struct btrfs_root *root,
6986                                struct cache_tree *extent_cache,
6987                                struct extent_buffer *eb, int slot)
6988 {
6989         struct btrfs_extent_item *ei;
6990         struct btrfs_extent_inline_ref *iref;
6991         struct btrfs_extent_data_ref *dref;
6992         struct btrfs_shared_data_ref *sref;
6993         struct btrfs_key key;
6994         struct extent_record tmpl;
6995         unsigned long end;
6996         unsigned long ptr;
6997         int ret;
6998         int type;
6999         u32 item_size = btrfs_item_size_nr(eb, slot);
7000         u64 refs = 0;
7001         u64 offset;
7002         u64 num_bytes;
7003         int metadata = 0;
7004
7005         btrfs_item_key_to_cpu(eb, &key, slot);
7006
7007         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7008                 metadata = 1;
7009                 num_bytes = root->fs_info->nodesize;
7010         } else {
7011                 num_bytes = key.offset;
7012         }
7013
7014         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7015                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7016                       key.objectid, root->fs_info->sectorsize);
7017                 return -EIO;
7018         }
7019         if (item_size < sizeof(*ei)) {
7020 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7021                 struct btrfs_extent_item_v0 *ei0;
7022                 BUG_ON(item_size != sizeof(*ei0));
7023                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7024                 refs = btrfs_extent_refs_v0(eb, ei0);
7025 #else
7026                 BUG();
7027 #endif
7028                 memset(&tmpl, 0, sizeof(tmpl));
7029                 tmpl.start = key.objectid;
7030                 tmpl.nr = num_bytes;
7031                 tmpl.extent_item_refs = refs;
7032                 tmpl.metadata = metadata;
7033                 tmpl.found_rec = 1;
7034                 tmpl.max_size = num_bytes;
7035
7036                 return add_extent_rec(extent_cache, &tmpl);
7037         }
7038
7039         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7040         refs = btrfs_extent_refs(eb, ei);
7041         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7042                 metadata = 1;
7043         else
7044                 metadata = 0;
7045         if (metadata && num_bytes != root->fs_info->nodesize) {
7046                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7047                       num_bytes, root->fs_info->nodesize);
7048                 return -EIO;
7049         }
7050         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7051                 error("ignore invalid data extent, length %llu is not aligned to %u",
7052                       num_bytes, root->fs_info->sectorsize);
7053                 return -EIO;
7054         }
7055
7056         memset(&tmpl, 0, sizeof(tmpl));
7057         tmpl.start = key.objectid;
7058         tmpl.nr = num_bytes;
7059         tmpl.extent_item_refs = refs;
7060         tmpl.metadata = metadata;
7061         tmpl.found_rec = 1;
7062         tmpl.max_size = num_bytes;
7063         add_extent_rec(extent_cache, &tmpl);
7064
7065         ptr = (unsigned long)(ei + 1);
7066         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7067             key.type == BTRFS_EXTENT_ITEM_KEY)
7068                 ptr += sizeof(struct btrfs_tree_block_info);
7069
7070         end = (unsigned long)ei + item_size;
7071         while (ptr < end) {
7072                 iref = (struct btrfs_extent_inline_ref *)ptr;
7073                 type = btrfs_extent_inline_ref_type(eb, iref);
7074                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7075                 switch (type) {
7076                 case BTRFS_TREE_BLOCK_REF_KEY:
7077                         ret = add_tree_backref(extent_cache, key.objectid,
7078                                         0, offset, 0);
7079                         if (ret < 0)
7080                                 error(
7081                         "add_tree_backref failed (extent items tree block): %s",
7082                                       strerror(-ret));
7083                         break;
7084                 case BTRFS_SHARED_BLOCK_REF_KEY:
7085                         ret = add_tree_backref(extent_cache, key.objectid,
7086                                         offset, 0, 0);
7087                         if (ret < 0)
7088                                 error(
7089                         "add_tree_backref failed (extent items shared block): %s",
7090                                       strerror(-ret));
7091                         break;
7092                 case BTRFS_EXTENT_DATA_REF_KEY:
7093                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7094                         add_data_backref(extent_cache, key.objectid, 0,
7095                                         btrfs_extent_data_ref_root(eb, dref),
7096                                         btrfs_extent_data_ref_objectid(eb,
7097                                                                        dref),
7098                                         btrfs_extent_data_ref_offset(eb, dref),
7099                                         btrfs_extent_data_ref_count(eb, dref),
7100                                         0, num_bytes);
7101                         break;
7102                 case BTRFS_SHARED_DATA_REF_KEY:
7103                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7104                         add_data_backref(extent_cache, key.objectid, offset,
7105                                         0, 0, 0,
7106                                         btrfs_shared_data_ref_count(eb, sref),
7107                                         0, num_bytes);
7108                         break;
7109                 default:
7110                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7111                                 key.objectid, key.type, num_bytes);
7112                         goto out;
7113                 }
7114                 ptr += btrfs_extent_inline_ref_size(type);
7115         }
7116         WARN_ON(ptr > end);
7117 out:
7118         return 0;
7119 }
7120
7121 static int check_cache_range(struct btrfs_root *root,
7122                              struct btrfs_block_group_cache *cache,
7123                              u64 offset, u64 bytes)
7124 {
7125         struct btrfs_free_space *entry;
7126         u64 *logical;
7127         u64 bytenr;
7128         int stripe_len;
7129         int i, nr, ret;
7130
7131         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7132                 bytenr = btrfs_sb_offset(i);
7133                 ret = btrfs_rmap_block(root->fs_info,
7134                                        cache->key.objectid, bytenr, 0,
7135                                        &logical, &nr, &stripe_len);
7136                 if (ret)
7137                         return ret;
7138
7139                 while (nr--) {
7140                         if (logical[nr] + stripe_len <= offset)
7141                                 continue;
7142                         if (offset + bytes <= logical[nr])
7143                                 continue;
7144                         if (logical[nr] == offset) {
7145                                 if (stripe_len >= bytes) {
7146                                         free(logical);
7147                                         return 0;
7148                                 }
7149                                 bytes -= stripe_len;
7150                                 offset += stripe_len;
7151                         } else if (logical[nr] < offset) {
7152                                 if (logical[nr] + stripe_len >=
7153                                     offset + bytes) {
7154                                         free(logical);
7155                                         return 0;
7156                                 }
7157                                 bytes = (offset + bytes) -
7158                                         (logical[nr] + stripe_len);
7159                                 offset = logical[nr] + stripe_len;
7160                         } else {
7161                                 /*
7162                                  * Could be tricky, the super may land in the
7163                                  * middle of the area we're checking.  First
7164                                  * check the easiest case, it's at the end.
7165                                  */
7166                                 if (logical[nr] + stripe_len >=
7167                                     bytes + offset) {
7168                                         bytes = logical[nr] - offset;
7169                                         continue;
7170                                 }
7171
7172                                 /* Check the left side */
7173                                 ret = check_cache_range(root, cache,
7174                                                         offset,
7175                                                         logical[nr] - offset);
7176                                 if (ret) {
7177                                         free(logical);
7178                                         return ret;
7179                                 }
7180
7181                                 /* Now we continue with the right side */
7182                                 bytes = (offset + bytes) -
7183                                         (logical[nr] + stripe_len);
7184                                 offset = logical[nr] + stripe_len;
7185                         }
7186                 }
7187
7188                 free(logical);
7189         }
7190
7191         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7192         if (!entry) {
7193                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7194                         offset, offset+bytes);
7195                 return -EINVAL;
7196         }
7197
7198         if (entry->offset != offset) {
7199                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7200                         entry->offset);
7201                 return -EINVAL;
7202         }
7203
7204         if (entry->bytes != bytes) {
7205                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7206                         bytes, entry->bytes, offset);
7207                 return -EINVAL;
7208         }
7209
7210         unlink_free_space(cache->free_space_ctl, entry);
7211         free(entry);
7212         return 0;
7213 }
7214
7215 static int verify_space_cache(struct btrfs_root *root,
7216                               struct btrfs_block_group_cache *cache)
7217 {
7218         struct btrfs_path path;
7219         struct extent_buffer *leaf;
7220         struct btrfs_key key;
7221         u64 last;
7222         int ret = 0;
7223
7224         root = root->fs_info->extent_root;
7225
7226         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7227
7228         btrfs_init_path(&path);
7229         key.objectid = last;
7230         key.offset = 0;
7231         key.type = BTRFS_EXTENT_ITEM_KEY;
7232         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7233         if (ret < 0)
7234                 goto out;
7235         ret = 0;
7236         while (1) {
7237                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7238                         ret = btrfs_next_leaf(root, &path);
7239                         if (ret < 0)
7240                                 goto out;
7241                         if (ret > 0) {
7242                                 ret = 0;
7243                                 break;
7244                         }
7245                 }
7246                 leaf = path.nodes[0];
7247                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7248                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7249                         break;
7250                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7251                     key.type != BTRFS_METADATA_ITEM_KEY) {
7252                         path.slots[0]++;
7253                         continue;
7254                 }
7255
7256                 if (last == key.objectid) {
7257                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7258                                 last = key.objectid + key.offset;
7259                         else
7260                                 last = key.objectid + root->fs_info->nodesize;
7261                         path.slots[0]++;
7262                         continue;
7263                 }
7264
7265                 ret = check_cache_range(root, cache, last,
7266                                         key.objectid - last);
7267                 if (ret)
7268                         break;
7269                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7270                         last = key.objectid + key.offset;
7271                 else
7272                         last = key.objectid + root->fs_info->nodesize;
7273                 path.slots[0]++;
7274         }
7275
7276         if (last < cache->key.objectid + cache->key.offset)
7277                 ret = check_cache_range(root, cache, last,
7278                                         cache->key.objectid +
7279                                         cache->key.offset - last);
7280
7281 out:
7282         btrfs_release_path(&path);
7283
7284         if (!ret &&
7285             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7286                 fprintf(stderr, "There are still entries left in the space "
7287                         "cache\n");
7288                 ret = -EINVAL;
7289         }
7290
7291         return ret;
7292 }
7293
7294 static int check_space_cache(struct btrfs_root *root)
7295 {
7296         struct btrfs_block_group_cache *cache;
7297         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7298         int ret;
7299         int error = 0;
7300
7301         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7302             btrfs_super_generation(root->fs_info->super_copy) !=
7303             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7304                 printf("cache and super generation don't match, space cache "
7305                        "will be invalidated\n");
7306                 return 0;
7307         }
7308
7309         if (ctx.progress_enabled) {
7310                 ctx.tp = TASK_FREE_SPACE;
7311                 task_start(ctx.info);
7312         }
7313
7314         while (1) {
7315                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7316                 if (!cache)
7317                         break;
7318
7319                 start = cache->key.objectid + cache->key.offset;
7320                 if (!cache->free_space_ctl) {
7321                         if (btrfs_init_free_space_ctl(cache,
7322                                                 root->fs_info->sectorsize)) {
7323                                 ret = -ENOMEM;
7324                                 break;
7325                         }
7326                 } else {
7327                         btrfs_remove_free_space_cache(cache);
7328                 }
7329
7330                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7331                         ret = exclude_super_stripes(root, cache);
7332                         if (ret) {
7333                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7334                                         strerror(-ret));
7335                                 error++;
7336                                 continue;
7337                         }
7338                         ret = load_free_space_tree(root->fs_info, cache);
7339                         free_excluded_extents(root, cache);
7340                         if (ret < 0) {
7341                                 fprintf(stderr, "could not load free space tree: %s\n",
7342                                         strerror(-ret));
7343                                 error++;
7344                                 continue;
7345                         }
7346                         error += ret;
7347                 } else {
7348                         ret = load_free_space_cache(root->fs_info, cache);
7349                         if (!ret)
7350                                 continue;
7351                 }
7352
7353                 ret = verify_space_cache(root, cache);
7354                 if (ret) {
7355                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7356                                 cache->key.objectid);
7357                         error++;
7358                 }
7359         }
7360
7361         task_stop(ctx.info);
7362
7363         return error ? -EINVAL : 0;
7364 }
7365
7366 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7367                         u64 num_bytes, unsigned long leaf_offset,
7368                         struct extent_buffer *eb) {
7369
7370         struct btrfs_fs_info *fs_info = root->fs_info;
7371         u64 offset = 0;
7372         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7373         char *data;
7374         unsigned long csum_offset;
7375         u32 csum;
7376         u32 csum_expected;
7377         u64 read_len;
7378         u64 data_checked = 0;
7379         u64 tmp;
7380         int ret = 0;
7381         int mirror;
7382         int num_copies;
7383
7384         if (num_bytes % fs_info->sectorsize)
7385                 return -EINVAL;
7386
7387         data = malloc(num_bytes);
7388         if (!data)
7389                 return -ENOMEM;
7390
7391         while (offset < num_bytes) {
7392                 mirror = 0;
7393 again:
7394                 read_len = num_bytes - offset;
7395                 /* read as much space once a time */
7396                 ret = read_extent_data(fs_info, data + offset,
7397                                 bytenr + offset, &read_len, mirror);
7398                 if (ret)
7399                         goto out;
7400                 data_checked = 0;
7401                 /* verify every 4k data's checksum */
7402                 while (data_checked < read_len) {
7403                         csum = ~(u32)0;
7404                         tmp = offset + data_checked;
7405
7406                         csum = btrfs_csum_data((char *)data + tmp,
7407                                                csum, fs_info->sectorsize);
7408                         btrfs_csum_final(csum, (u8 *)&csum);
7409
7410                         csum_offset = leaf_offset +
7411                                  tmp / fs_info->sectorsize * csum_size;
7412                         read_extent_buffer(eb, (char *)&csum_expected,
7413                                            csum_offset, csum_size);
7414                         /* try another mirror */
7415                         if (csum != csum_expected) {
7416                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7417                                                 mirror, bytenr + tmp,
7418                                                 csum, csum_expected);
7419                                 num_copies = btrfs_num_copies(root->fs_info,
7420                                                 bytenr, num_bytes);
7421                                 if (mirror < num_copies - 1) {
7422                                         mirror += 1;
7423                                         goto again;
7424                                 }
7425                         }
7426                         data_checked += fs_info->sectorsize;
7427                 }
7428                 offset += read_len;
7429         }
7430 out:
7431         free(data);
7432         return ret;
7433 }
7434
7435 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7436                                u64 num_bytes)
7437 {
7438         struct btrfs_path path;
7439         struct extent_buffer *leaf;
7440         struct btrfs_key key;
7441         int ret;
7442
7443         btrfs_init_path(&path);
7444         key.objectid = bytenr;
7445         key.type = BTRFS_EXTENT_ITEM_KEY;
7446         key.offset = (u64)-1;
7447
7448 again:
7449         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7450                                 0, 0);
7451         if (ret < 0) {
7452                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7453                 btrfs_release_path(&path);
7454                 return ret;
7455         } else if (ret) {
7456                 if (path.slots[0] > 0) {
7457                         path.slots[0]--;
7458                 } else {
7459                         ret = btrfs_prev_leaf(root, &path);
7460                         if (ret < 0) {
7461                                 goto out;
7462                         } else if (ret > 0) {
7463                                 ret = 0;
7464                                 goto out;
7465                         }
7466                 }
7467         }
7468
7469         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7470
7471         /*
7472          * Block group items come before extent items if they have the same
7473          * bytenr, so walk back one more just in case.  Dear future traveller,
7474          * first congrats on mastering time travel.  Now if it's not too much
7475          * trouble could you go back to 2006 and tell Chris to make the
7476          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7477          * EXTENT_ITEM_KEY please?
7478          */
7479         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7480                 if (path.slots[0] > 0) {
7481                         path.slots[0]--;
7482                 } else {
7483                         ret = btrfs_prev_leaf(root, &path);
7484                         if (ret < 0) {
7485                                 goto out;
7486                         } else if (ret > 0) {
7487                                 ret = 0;
7488                                 goto out;
7489                         }
7490                 }
7491                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7492         }
7493
7494         while (num_bytes) {
7495                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7496                         ret = btrfs_next_leaf(root, &path);
7497                         if (ret < 0) {
7498                                 fprintf(stderr, "Error going to next leaf "
7499                                         "%d\n", ret);
7500                                 btrfs_release_path(&path);
7501                                 return ret;
7502                         } else if (ret) {
7503                                 break;
7504                         }
7505                 }
7506                 leaf = path.nodes[0];
7507                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7508                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7509                         path.slots[0]++;
7510                         continue;
7511                 }
7512                 if (key.objectid + key.offset < bytenr) {
7513                         path.slots[0]++;
7514                         continue;
7515                 }
7516                 if (key.objectid > bytenr + num_bytes)
7517                         break;
7518
7519                 if (key.objectid == bytenr) {
7520                         if (key.offset >= num_bytes) {
7521                                 num_bytes = 0;
7522                                 break;
7523                         }
7524                         num_bytes -= key.offset;
7525                         bytenr += key.offset;
7526                 } else if (key.objectid < bytenr) {
7527                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7528                                 num_bytes = 0;
7529                                 break;
7530                         }
7531                         num_bytes = (bytenr + num_bytes) -
7532                                 (key.objectid + key.offset);
7533                         bytenr = key.objectid + key.offset;
7534                 } else {
7535                         if (key.objectid + key.offset < bytenr + num_bytes) {
7536                                 u64 new_start = key.objectid + key.offset;
7537                                 u64 new_bytes = bytenr + num_bytes - new_start;
7538
7539                                 /*
7540                                  * Weird case, the extent is in the middle of
7541                                  * our range, we'll have to search one side
7542                                  * and then the other.  Not sure if this happens
7543                                  * in real life, but no harm in coding it up
7544                                  * anyway just in case.
7545                                  */
7546                                 btrfs_release_path(&path);
7547                                 ret = check_extent_exists(root, new_start,
7548                                                           new_bytes);
7549                                 if (ret) {
7550                                         fprintf(stderr, "Right section didn't "
7551                                                 "have a record\n");
7552                                         break;
7553                                 }
7554                                 num_bytes = key.objectid - bytenr;
7555                                 goto again;
7556                         }
7557                         num_bytes = key.objectid - bytenr;
7558                 }
7559                 path.slots[0]++;
7560         }
7561         ret = 0;
7562
7563 out:
7564         if (num_bytes && !ret) {
7565                 fprintf(stderr, "There are no extents for csum range "
7566                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7567                 ret = 1;
7568         }
7569
7570         btrfs_release_path(&path);
7571         return ret;
7572 }
7573
7574 static int check_csums(struct btrfs_root *root)
7575 {
7576         struct btrfs_path path;
7577         struct extent_buffer *leaf;
7578         struct btrfs_key key;
7579         u64 offset = 0, num_bytes = 0;
7580         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7581         int errors = 0;
7582         int ret;
7583         u64 data_len;
7584         unsigned long leaf_offset;
7585
7586         root = root->fs_info->csum_root;
7587         if (!extent_buffer_uptodate(root->node)) {
7588                 fprintf(stderr, "No valid csum tree found\n");
7589                 return -ENOENT;
7590         }
7591
7592         btrfs_init_path(&path);
7593         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7594         key.type = BTRFS_EXTENT_CSUM_KEY;
7595         key.offset = 0;
7596         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7597         if (ret < 0) {
7598                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7599                 btrfs_release_path(&path);
7600                 return ret;
7601         }
7602
7603         if (ret > 0 && path.slots[0])
7604                 path.slots[0]--;
7605         ret = 0;
7606
7607         while (1) {
7608                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7609                         ret = btrfs_next_leaf(root, &path);
7610                         if (ret < 0) {
7611                                 fprintf(stderr, "Error going to next leaf "
7612                                         "%d\n", ret);
7613                                 break;
7614                         }
7615                         if (ret)
7616                                 break;
7617                 }
7618                 leaf = path.nodes[0];
7619
7620                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7621                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7622                         path.slots[0]++;
7623                         continue;
7624                 }
7625
7626                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7627                               csum_size) * root->fs_info->sectorsize;
7628                 if (!check_data_csum)
7629                         goto skip_csum_check;
7630                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7631                 ret = check_extent_csums(root, key.offset, data_len,
7632                                          leaf_offset, leaf);
7633                 if (ret)
7634                         break;
7635 skip_csum_check:
7636                 if (!num_bytes) {
7637                         offset = key.offset;
7638                 } else if (key.offset != offset + num_bytes) {
7639                         ret = check_extent_exists(root, offset, num_bytes);
7640                         if (ret) {
7641                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7642                                         "there is no extent record\n",
7643                                         offset, offset+num_bytes);
7644                                 errors++;
7645                         }
7646                         offset = key.offset;
7647                         num_bytes = 0;
7648                 }
7649                 num_bytes += data_len;
7650                 path.slots[0]++;
7651         }
7652
7653         btrfs_release_path(&path);
7654         return errors;
7655 }
7656
7657 static int is_dropped_key(struct btrfs_key *key,
7658                           struct btrfs_key *drop_key) {
7659         if (key->objectid < drop_key->objectid)
7660                 return 1;
7661         else if (key->objectid == drop_key->objectid) {
7662                 if (key->type < drop_key->type)
7663                         return 1;
7664                 else if (key->type == drop_key->type) {
7665                         if (key->offset < drop_key->offset)
7666                                 return 1;
7667                 }
7668         }
7669         return 0;
7670 }
7671
7672 /*
7673  * Here are the rules for FULL_BACKREF.
7674  *
7675  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7676  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7677  *      FULL_BACKREF set.
7678  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7679  *    if it happened after the relocation occurred since we'll have dropped the
7680  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7681  *    have no real way to know for sure.
7682  *
7683  * We process the blocks one root at a time, and we start from the lowest root
7684  * objectid and go to the highest.  So we can just lookup the owner backref for
7685  * the record and if we don't find it then we know it doesn't exist and we have
7686  * a FULL BACKREF.
7687  *
7688  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7689  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7690  * be set or not and then we can check later once we've gathered all the refs.
7691  */
7692 static int calc_extent_flag(struct cache_tree *extent_cache,
7693                            struct extent_buffer *buf,
7694                            struct root_item_record *ri,
7695                            u64 *flags)
7696 {
7697         struct extent_record *rec;
7698         struct cache_extent *cache;
7699         struct tree_backref *tback;
7700         u64 owner = 0;
7701
7702         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7703         /* we have added this extent before */
7704         if (!cache)
7705                 return -ENOENT;
7706
7707         rec = container_of(cache, struct extent_record, cache);
7708
7709         /*
7710          * Except file/reloc tree, we can not have
7711          * FULL BACKREF MODE
7712          */
7713         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7714                 goto normal;
7715         /*
7716          * root node
7717          */
7718         if (buf->start == ri->bytenr)
7719                 goto normal;
7720
7721         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7722                 goto full_backref;
7723
7724         owner = btrfs_header_owner(buf);
7725         if (owner == ri->objectid)
7726                 goto normal;
7727
7728         tback = find_tree_backref(rec, 0, owner);
7729         if (!tback)
7730                 goto full_backref;
7731 normal:
7732         *flags = 0;
7733         if (rec->flag_block_full_backref != FLAG_UNSET &&
7734             rec->flag_block_full_backref != 0)
7735                 rec->bad_full_backref = 1;
7736         return 0;
7737 full_backref:
7738         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7739         if (rec->flag_block_full_backref != FLAG_UNSET &&
7740             rec->flag_block_full_backref != 1)
7741                 rec->bad_full_backref = 1;
7742         return 0;
7743 }
7744
7745 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7746 {
7747         fprintf(stderr, "Invalid key type(");
7748         print_key_type(stderr, 0, key_type);
7749         fprintf(stderr, ") found in root(");
7750         print_objectid(stderr, rootid, 0);
7751         fprintf(stderr, ")\n");
7752 }
7753
7754 /*
7755  * Check if the key is valid with its extent buffer.
7756  *
7757  * This is a early check in case invalid key exists in a extent buffer
7758  * This is not comprehensive yet, but should prevent wrong key/item passed
7759  * further
7760  */
7761 static int check_type_with_root(u64 rootid, u8 key_type)
7762 {
7763         switch (key_type) {
7764         /* Only valid in chunk tree */
7765         case BTRFS_DEV_ITEM_KEY:
7766         case BTRFS_CHUNK_ITEM_KEY:
7767                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7768                         goto err;
7769                 break;
7770         /* valid in csum and log tree */
7771         case BTRFS_CSUM_TREE_OBJECTID:
7772                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7773                       is_fstree(rootid)))
7774                         goto err;
7775                 break;
7776         case BTRFS_EXTENT_ITEM_KEY:
7777         case BTRFS_METADATA_ITEM_KEY:
7778         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7779                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7780                         goto err;
7781                 break;
7782         case BTRFS_ROOT_ITEM_KEY:
7783                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7784                         goto err;
7785                 break;
7786         case BTRFS_DEV_EXTENT_KEY:
7787                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7788                         goto err;
7789                 break;
7790         }
7791         return 0;
7792 err:
7793         report_mismatch_key_root(key_type, rootid);
7794         return -EINVAL;
7795 }
7796
7797 static int run_next_block(struct btrfs_root *root,
7798                           struct block_info *bits,
7799                           int bits_nr,
7800                           u64 *last,
7801                           struct cache_tree *pending,
7802                           struct cache_tree *seen,
7803                           struct cache_tree *reada,
7804                           struct cache_tree *nodes,
7805                           struct cache_tree *extent_cache,
7806                           struct cache_tree *chunk_cache,
7807                           struct rb_root *dev_cache,
7808                           struct block_group_tree *block_group_cache,
7809                           struct device_extent_tree *dev_extent_cache,
7810                           struct root_item_record *ri)
7811 {
7812         struct btrfs_fs_info *fs_info = root->fs_info;
7813         struct extent_buffer *buf;
7814         struct extent_record *rec = NULL;
7815         u64 bytenr;
7816         u32 size;
7817         u64 parent;
7818         u64 owner;
7819         u64 flags;
7820         u64 ptr;
7821         u64 gen = 0;
7822         int ret = 0;
7823         int i;
7824         int nritems;
7825         struct btrfs_key key;
7826         struct cache_extent *cache;
7827         int reada_bits;
7828
7829         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7830                                     bits_nr, &reada_bits);
7831         if (nritems == 0)
7832                 return 1;
7833
7834         if (!reada_bits) {
7835                 for(i = 0; i < nritems; i++) {
7836                         ret = add_cache_extent(reada, bits[i].start,
7837                                                bits[i].size);
7838                         if (ret == -EEXIST)
7839                                 continue;
7840
7841                         /* fixme, get the parent transid */
7842                         readahead_tree_block(fs_info, bits[i].start, 0);
7843                 }
7844         }
7845         *last = bits[0].start;
7846         bytenr = bits[0].start;
7847         size = bits[0].size;
7848
7849         cache = lookup_cache_extent(pending, bytenr, size);
7850         if (cache) {
7851                 remove_cache_extent(pending, cache);
7852                 free(cache);
7853         }
7854         cache = lookup_cache_extent(reada, bytenr, size);
7855         if (cache) {
7856                 remove_cache_extent(reada, cache);
7857                 free(cache);
7858         }
7859         cache = lookup_cache_extent(nodes, bytenr, size);
7860         if (cache) {
7861                 remove_cache_extent(nodes, cache);
7862                 free(cache);
7863         }
7864         cache = lookup_cache_extent(extent_cache, bytenr, size);
7865         if (cache) {
7866                 rec = container_of(cache, struct extent_record, cache);
7867                 gen = rec->parent_generation;
7868         }
7869
7870         /* fixme, get the real parent transid */
7871         buf = read_tree_block(root->fs_info, bytenr, gen);
7872         if (!extent_buffer_uptodate(buf)) {
7873                 record_bad_block_io(root->fs_info,
7874                                     extent_cache, bytenr, size);
7875                 goto out;
7876         }
7877
7878         nritems = btrfs_header_nritems(buf);
7879
7880         flags = 0;
7881         if (!init_extent_tree) {
7882                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7883                                        btrfs_header_level(buf), 1, NULL,
7884                                        &flags);
7885                 if (ret < 0) {
7886                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7887                         if (ret < 0) {
7888                                 fprintf(stderr, "Couldn't calc extent flags\n");
7889                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7890                         }
7891                 }
7892         } else {
7893                 flags = 0;
7894                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7895                 if (ret < 0) {
7896                         fprintf(stderr, "Couldn't calc extent flags\n");
7897                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7898                 }
7899         }
7900
7901         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7902                 if (ri != NULL &&
7903                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7904                     ri->objectid == btrfs_header_owner(buf)) {
7905                         /*
7906                          * Ok we got to this block from it's original owner and
7907                          * we have FULL_BACKREF set.  Relocation can leave
7908                          * converted blocks over so this is altogether possible,
7909                          * however it's not possible if the generation > the
7910                          * last snapshot, so check for this case.
7911                          */
7912                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7913                             btrfs_header_generation(buf) > ri->last_snapshot) {
7914                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7915                                 rec->bad_full_backref = 1;
7916                         }
7917                 }
7918         } else {
7919                 if (ri != NULL &&
7920                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7921                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7922                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7923                         rec->bad_full_backref = 1;
7924                 }
7925         }
7926
7927         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7928                 rec->flag_block_full_backref = 1;
7929                 parent = bytenr;
7930                 owner = 0;
7931         } else {
7932                 rec->flag_block_full_backref = 0;
7933                 parent = 0;
7934                 owner = btrfs_header_owner(buf);
7935         }
7936
7937         ret = check_block(root, extent_cache, buf, flags);
7938         if (ret)
7939                 goto out;
7940
7941         if (btrfs_is_leaf(buf)) {
7942                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7943                 for (i = 0; i < nritems; i++) {
7944                         struct btrfs_file_extent_item *fi;
7945                         btrfs_item_key_to_cpu(buf, &key, i);
7946                         /*
7947                          * Check key type against the leaf owner.
7948                          * Could filter quite a lot of early error if
7949                          * owner is correct
7950                          */
7951                         if (check_type_with_root(btrfs_header_owner(buf),
7952                                                  key.type)) {
7953                                 fprintf(stderr, "ignoring invalid key\n");
7954                                 continue;
7955                         }
7956                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7957                                 process_extent_item(root, extent_cache, buf,
7958                                                     i);
7959                                 continue;
7960                         }
7961                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7962                                 process_extent_item(root, extent_cache, buf,
7963                                                     i);
7964                                 continue;
7965                         }
7966                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7967                                 total_csum_bytes +=
7968                                         btrfs_item_size_nr(buf, i);
7969                                 continue;
7970                         }
7971                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7972                                 process_chunk_item(chunk_cache, &key, buf, i);
7973                                 continue;
7974                         }
7975                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7976                                 process_device_item(dev_cache, &key, buf, i);
7977                                 continue;
7978                         }
7979                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7980                                 process_block_group_item(block_group_cache,
7981                                         &key, buf, i);
7982                                 continue;
7983                         }
7984                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7985                                 process_device_extent_item(dev_extent_cache,
7986                                         &key, buf, i);
7987                                 continue;
7988
7989                         }
7990                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7991 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7992                                 process_extent_ref_v0(extent_cache, buf, i);
7993 #else
7994                                 BUG();
7995 #endif
7996                                 continue;
7997                         }
7998
7999                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8000                                 ret = add_tree_backref(extent_cache,
8001                                                 key.objectid, 0, key.offset, 0);
8002                                 if (ret < 0)
8003                                         error(
8004                                 "add_tree_backref failed (leaf tree block): %s",
8005                                               strerror(-ret));
8006                                 continue;
8007                         }
8008                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8009                                 ret = add_tree_backref(extent_cache,
8010                                                 key.objectid, key.offset, 0, 0);
8011                                 if (ret < 0)
8012                                         error(
8013                                 "add_tree_backref failed (leaf shared block): %s",
8014                                               strerror(-ret));
8015                                 continue;
8016                         }
8017                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8018                                 struct btrfs_extent_data_ref *ref;
8019                                 ref = btrfs_item_ptr(buf, i,
8020                                                 struct btrfs_extent_data_ref);
8021                                 add_data_backref(extent_cache,
8022                                         key.objectid, 0,
8023                                         btrfs_extent_data_ref_root(buf, ref),
8024                                         btrfs_extent_data_ref_objectid(buf,
8025                                                                        ref),
8026                                         btrfs_extent_data_ref_offset(buf, ref),
8027                                         btrfs_extent_data_ref_count(buf, ref),
8028                                         0, root->fs_info->sectorsize);
8029                                 continue;
8030                         }
8031                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8032                                 struct btrfs_shared_data_ref *ref;
8033                                 ref = btrfs_item_ptr(buf, i,
8034                                                 struct btrfs_shared_data_ref);
8035                                 add_data_backref(extent_cache,
8036                                         key.objectid, key.offset, 0, 0, 0,
8037                                         btrfs_shared_data_ref_count(buf, ref),
8038                                         0, root->fs_info->sectorsize);
8039                                 continue;
8040                         }
8041                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8042                                 struct bad_item *bad;
8043
8044                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8045                                         continue;
8046                                 if (!owner)
8047                                         continue;
8048                                 bad = malloc(sizeof(struct bad_item));
8049                                 if (!bad)
8050                                         continue;
8051                                 INIT_LIST_HEAD(&bad->list);
8052                                 memcpy(&bad->key, &key,
8053                                        sizeof(struct btrfs_key));
8054                                 bad->root_id = owner;
8055                                 list_add_tail(&bad->list, &delete_items);
8056                                 continue;
8057                         }
8058                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8059                                 continue;
8060                         fi = btrfs_item_ptr(buf, i,
8061                                             struct btrfs_file_extent_item);
8062                         if (btrfs_file_extent_type(buf, fi) ==
8063                             BTRFS_FILE_EXTENT_INLINE)
8064                                 continue;
8065                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8066                                 continue;
8067
8068                         data_bytes_allocated +=
8069                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8070                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8071                                 abort();
8072                         }
8073                         data_bytes_referenced +=
8074                                 btrfs_file_extent_num_bytes(buf, fi);
8075                         add_data_backref(extent_cache,
8076                                 btrfs_file_extent_disk_bytenr(buf, fi),
8077                                 parent, owner, key.objectid, key.offset -
8078                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8079                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8080                 }
8081         } else {
8082                 int level;
8083                 struct btrfs_key first_key;
8084
8085                 first_key.objectid = 0;
8086
8087                 if (nritems > 0)
8088                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8089                 level = btrfs_header_level(buf);
8090                 for (i = 0; i < nritems; i++) {
8091                         struct extent_record tmpl;
8092
8093                         ptr = btrfs_node_blockptr(buf, i);
8094                         size = root->fs_info->nodesize;
8095                         btrfs_node_key_to_cpu(buf, &key, i);
8096                         if (ri != NULL) {
8097                                 if ((level == ri->drop_level)
8098                                     && is_dropped_key(&key, &ri->drop_key)) {
8099                                         continue;
8100                                 }
8101                         }
8102
8103                         memset(&tmpl, 0, sizeof(tmpl));
8104                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8105                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8106                         tmpl.start = ptr;
8107                         tmpl.nr = size;
8108                         tmpl.refs = 1;
8109                         tmpl.metadata = 1;
8110                         tmpl.max_size = size;
8111                         ret = add_extent_rec(extent_cache, &tmpl);
8112                         if (ret < 0)
8113                                 goto out;
8114
8115                         ret = add_tree_backref(extent_cache, ptr, parent,
8116                                         owner, 1);
8117                         if (ret < 0) {
8118                                 error(
8119                                 "add_tree_backref failed (non-leaf block): %s",
8120                                       strerror(-ret));
8121                                 continue;
8122                         }
8123
8124                         if (level > 1) {
8125                                 add_pending(nodes, seen, ptr, size);
8126                         } else {
8127                                 add_pending(pending, seen, ptr, size);
8128                         }
8129                 }
8130                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8131                                       nritems) * sizeof(struct btrfs_key_ptr);
8132         }
8133         total_btree_bytes += buf->len;
8134         if (fs_root_objectid(btrfs_header_owner(buf)))
8135                 total_fs_tree_bytes += buf->len;
8136         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8137                 total_extent_tree_bytes += buf->len;
8138 out:
8139         free_extent_buffer(buf);
8140         return ret;
8141 }
8142
8143 static int add_root_to_pending(struct extent_buffer *buf,
8144                                struct cache_tree *extent_cache,
8145                                struct cache_tree *pending,
8146                                struct cache_tree *seen,
8147                                struct cache_tree *nodes,
8148                                u64 objectid)
8149 {
8150         struct extent_record tmpl;
8151         int ret;
8152
8153         if (btrfs_header_level(buf) > 0)
8154                 add_pending(nodes, seen, buf->start, buf->len);
8155         else
8156                 add_pending(pending, seen, buf->start, buf->len);
8157
8158         memset(&tmpl, 0, sizeof(tmpl));
8159         tmpl.start = buf->start;
8160         tmpl.nr = buf->len;
8161         tmpl.is_root = 1;
8162         tmpl.refs = 1;
8163         tmpl.metadata = 1;
8164         tmpl.max_size = buf->len;
8165         add_extent_rec(extent_cache, &tmpl);
8166
8167         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8168             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8169                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8170                                 0, 1);
8171         else
8172                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8173                                 1);
8174         return ret;
8175 }
8176
8177 /* as we fix the tree, we might be deleting blocks that
8178  * we're tracking for repair.  This hook makes sure we
8179  * remove any backrefs for blocks as we are fixing them.
8180  */
8181 static int free_extent_hook(struct btrfs_trans_handle *trans,
8182                             struct btrfs_root *root,
8183                             u64 bytenr, u64 num_bytes, u64 parent,
8184                             u64 root_objectid, u64 owner, u64 offset,
8185                             int refs_to_drop)
8186 {
8187         struct extent_record *rec;
8188         struct cache_extent *cache;
8189         int is_data;
8190         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8191
8192         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8193         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8194         if (!cache)
8195                 return 0;
8196
8197         rec = container_of(cache, struct extent_record, cache);
8198         if (is_data) {
8199                 struct data_backref *back;
8200                 back = find_data_backref(rec, parent, root_objectid, owner,
8201                                          offset, 1, bytenr, num_bytes);
8202                 if (!back)
8203                         goto out;
8204                 if (back->node.found_ref) {
8205                         back->found_ref -= refs_to_drop;
8206                         if (rec->refs)
8207                                 rec->refs -= refs_to_drop;
8208                 }
8209                 if (back->node.found_extent_tree) {
8210                         back->num_refs -= refs_to_drop;
8211                         if (rec->extent_item_refs)
8212                                 rec->extent_item_refs -= refs_to_drop;
8213                 }
8214                 if (back->found_ref == 0)
8215                         back->node.found_ref = 0;
8216                 if (back->num_refs == 0)
8217                         back->node.found_extent_tree = 0;
8218
8219                 if (!back->node.found_extent_tree && back->node.found_ref) {
8220                         list_del(&back->node.list);
8221                         free(back);
8222                 }
8223         } else {
8224                 struct tree_backref *back;
8225                 back = find_tree_backref(rec, parent, root_objectid);
8226                 if (!back)
8227                         goto out;
8228                 if (back->node.found_ref) {
8229                         if (rec->refs)
8230                                 rec->refs--;
8231                         back->node.found_ref = 0;
8232                 }
8233                 if (back->node.found_extent_tree) {
8234                         if (rec->extent_item_refs)
8235                                 rec->extent_item_refs--;
8236                         back->node.found_extent_tree = 0;
8237                 }
8238                 if (!back->node.found_extent_tree && back->node.found_ref) {
8239                         list_del(&back->node.list);
8240                         free(back);
8241                 }
8242         }
8243         maybe_free_extent_rec(extent_cache, rec);
8244 out:
8245         return 0;
8246 }
8247
8248 static int delete_extent_records(struct btrfs_trans_handle *trans,
8249                                  struct btrfs_root *root,
8250                                  struct btrfs_path *path,
8251                                  u64 bytenr)
8252 {
8253         struct btrfs_key key;
8254         struct btrfs_key found_key;
8255         struct extent_buffer *leaf;
8256         int ret;
8257         int slot;
8258
8259
8260         key.objectid = bytenr;
8261         key.type = (u8)-1;
8262         key.offset = (u64)-1;
8263
8264         while(1) {
8265                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8266                                         &key, path, 0, 1);
8267                 if (ret < 0)
8268                         break;
8269
8270                 if (ret > 0) {
8271                         ret = 0;
8272                         if (path->slots[0] == 0)
8273                                 break;
8274                         path->slots[0]--;
8275                 }
8276                 ret = 0;
8277
8278                 leaf = path->nodes[0];
8279                 slot = path->slots[0];
8280
8281                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8282                 if (found_key.objectid != bytenr)
8283                         break;
8284
8285                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8286                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8287                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8288                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8289                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8290                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8291                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8292                         btrfs_release_path(path);
8293                         if (found_key.type == 0) {
8294                                 if (found_key.offset == 0)
8295                                         break;
8296                                 key.offset = found_key.offset - 1;
8297                                 key.type = found_key.type;
8298                         }
8299                         key.type = found_key.type - 1;
8300                         key.offset = (u64)-1;
8301                         continue;
8302                 }
8303
8304                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8305                         found_key.objectid, found_key.type, found_key.offset);
8306
8307                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8308                 if (ret)
8309                         break;
8310                 btrfs_release_path(path);
8311
8312                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8313                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8314                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8315                                 found_key.offset : root->fs_info->nodesize;
8316
8317                         ret = btrfs_update_block_group(trans, root, bytenr,
8318                                                        bytes, 0, 0);
8319                         if (ret)
8320                                 break;
8321                 }
8322         }
8323
8324         btrfs_release_path(path);
8325         return ret;
8326 }
8327
8328 /*
8329  * for a single backref, this will allocate a new extent
8330  * and add the backref to it.
8331  */
8332 static int record_extent(struct btrfs_trans_handle *trans,
8333                          struct btrfs_fs_info *info,
8334                          struct btrfs_path *path,
8335                          struct extent_record *rec,
8336                          struct extent_backref *back,
8337                          int allocated, u64 flags)
8338 {
8339         int ret = 0;
8340         struct btrfs_root *extent_root = info->extent_root;
8341         struct extent_buffer *leaf;
8342         struct btrfs_key ins_key;
8343         struct btrfs_extent_item *ei;
8344         struct data_backref *dback;
8345         struct btrfs_tree_block_info *bi;
8346
8347         if (!back->is_data)
8348                 rec->max_size = max_t(u64, rec->max_size,
8349                                     info->nodesize);
8350
8351         if (!allocated) {
8352                 u32 item_size = sizeof(*ei);
8353
8354                 if (!back->is_data)
8355                         item_size += sizeof(*bi);
8356
8357                 ins_key.objectid = rec->start;
8358                 ins_key.offset = rec->max_size;
8359                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8360
8361                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8362                                         &ins_key, item_size);
8363                 if (ret)
8364                         goto fail;
8365
8366                 leaf = path->nodes[0];
8367                 ei = btrfs_item_ptr(leaf, path->slots[0],
8368                                     struct btrfs_extent_item);
8369
8370                 btrfs_set_extent_refs(leaf, ei, 0);
8371                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8372
8373                 if (back->is_data) {
8374                         btrfs_set_extent_flags(leaf, ei,
8375                                                BTRFS_EXTENT_FLAG_DATA);
8376                 } else {
8377                         struct btrfs_disk_key copy_key;;
8378
8379                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8380                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8381                                              sizeof(*bi));
8382
8383                         btrfs_set_disk_key_objectid(&copy_key,
8384                                                     rec->info_objectid);
8385                         btrfs_set_disk_key_type(&copy_key, 0);
8386                         btrfs_set_disk_key_offset(&copy_key, 0);
8387
8388                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8389                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8390
8391                         btrfs_set_extent_flags(leaf, ei,
8392                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8393                 }
8394
8395                 btrfs_mark_buffer_dirty(leaf);
8396                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8397                                                rec->max_size, 1, 0);
8398                 if (ret)
8399                         goto fail;
8400                 btrfs_release_path(path);
8401         }
8402
8403         if (back->is_data) {
8404                 u64 parent;
8405                 int i;
8406
8407                 dback = to_data_backref(back);
8408                 if (back->full_backref)
8409                         parent = dback->parent;
8410                 else
8411                         parent = 0;
8412
8413                 for (i = 0; i < dback->found_ref; i++) {
8414                         /* if parent != 0, we're doing a full backref
8415                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8416                          * just makes the backref allocator create a data
8417                          * backref
8418                          */
8419                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8420                                                    rec->start, rec->max_size,
8421                                                    parent,
8422                                                    dback->root,
8423                                                    parent ?
8424                                                    BTRFS_FIRST_FREE_OBJECTID :
8425                                                    dback->owner,
8426                                                    dback->offset);
8427                         if (ret)
8428                                 break;
8429                 }
8430                 fprintf(stderr, "adding new data backref"
8431                                 " on %llu %s %llu owner %llu"
8432                                 " offset %llu found %d\n",
8433                                 (unsigned long long)rec->start,
8434                                 back->full_backref ?
8435                                 "parent" : "root",
8436                                 back->full_backref ?
8437                                 (unsigned long long)parent :
8438                                 (unsigned long long)dback->root,
8439                                 (unsigned long long)dback->owner,
8440                                 (unsigned long long)dback->offset,
8441                                 dback->found_ref);
8442         } else {
8443                 u64 parent;
8444                 struct tree_backref *tback;
8445
8446                 tback = to_tree_backref(back);
8447                 if (back->full_backref)
8448                         parent = tback->parent;
8449                 else
8450                         parent = 0;
8451
8452                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8453                                            rec->start, rec->max_size,
8454                                            parent, tback->root, 0, 0);
8455                 fprintf(stderr, "adding new tree backref on "
8456                         "start %llu len %llu parent %llu root %llu\n",
8457                         rec->start, rec->max_size, parent, tback->root);
8458         }
8459 fail:
8460         btrfs_release_path(path);
8461         return ret;
8462 }
8463
8464 static struct extent_entry *find_entry(struct list_head *entries,
8465                                        u64 bytenr, u64 bytes)
8466 {
8467         struct extent_entry *entry = NULL;
8468
8469         list_for_each_entry(entry, entries, list) {
8470                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8471                         return entry;
8472         }
8473
8474         return NULL;
8475 }
8476
8477 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8478 {
8479         struct extent_entry *entry, *best = NULL, *prev = NULL;
8480
8481         list_for_each_entry(entry, entries, list) {
8482                 /*
8483                  * If there are as many broken entries as entries then we know
8484                  * not to trust this particular entry.
8485                  */
8486                 if (entry->broken == entry->count)
8487                         continue;
8488
8489                 /*
8490                  * Special case, when there are only two entries and 'best' is
8491                  * the first one
8492                  */
8493                 if (!prev) {
8494                         best = entry;
8495                         prev = entry;
8496                         continue;
8497                 }
8498
8499                 /*
8500                  * If our current entry == best then we can't be sure our best
8501                  * is really the best, so we need to keep searching.
8502                  */
8503                 if (best && best->count == entry->count) {
8504                         prev = entry;
8505                         best = NULL;
8506                         continue;
8507                 }
8508
8509                 /* Prev == entry, not good enough, have to keep searching */
8510                 if (!prev->broken && prev->count == entry->count)
8511                         continue;
8512
8513                 if (!best)
8514                         best = (prev->count > entry->count) ? prev : entry;
8515                 else if (best->count < entry->count)
8516                         best = entry;
8517                 prev = entry;
8518         }
8519
8520         return best;
8521 }
8522
8523 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8524                       struct data_backref *dback, struct extent_entry *entry)
8525 {
8526         struct btrfs_trans_handle *trans;
8527         struct btrfs_root *root;
8528         struct btrfs_file_extent_item *fi;
8529         struct extent_buffer *leaf;
8530         struct btrfs_key key;
8531         u64 bytenr, bytes;
8532         int ret, err;
8533
8534         key.objectid = dback->root;
8535         key.type = BTRFS_ROOT_ITEM_KEY;
8536         key.offset = (u64)-1;
8537         root = btrfs_read_fs_root(info, &key);
8538         if (IS_ERR(root)) {
8539                 fprintf(stderr, "Couldn't find root for our ref\n");
8540                 return -EINVAL;
8541         }
8542
8543         /*
8544          * The backref points to the original offset of the extent if it was
8545          * split, so we need to search down to the offset we have and then walk
8546          * forward until we find the backref we're looking for.
8547          */
8548         key.objectid = dback->owner;
8549         key.type = BTRFS_EXTENT_DATA_KEY;
8550         key.offset = dback->offset;
8551         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8552         if (ret < 0) {
8553                 fprintf(stderr, "Error looking up ref %d\n", ret);
8554                 return ret;
8555         }
8556
8557         while (1) {
8558                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8559                         ret = btrfs_next_leaf(root, path);
8560                         if (ret) {
8561                                 fprintf(stderr, "Couldn't find our ref, next\n");
8562                                 return -EINVAL;
8563                         }
8564                 }
8565                 leaf = path->nodes[0];
8566                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8567                 if (key.objectid != dback->owner ||
8568                     key.type != BTRFS_EXTENT_DATA_KEY) {
8569                         fprintf(stderr, "Couldn't find our ref, search\n");
8570                         return -EINVAL;
8571                 }
8572                 fi = btrfs_item_ptr(leaf, path->slots[0],
8573                                     struct btrfs_file_extent_item);
8574                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8575                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8576
8577                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8578                         break;
8579                 path->slots[0]++;
8580         }
8581
8582         btrfs_release_path(path);
8583
8584         trans = btrfs_start_transaction(root, 1);
8585         if (IS_ERR(trans))
8586                 return PTR_ERR(trans);
8587
8588         /*
8589          * Ok we have the key of the file extent we want to fix, now we can cow
8590          * down to the thing and fix it.
8591          */
8592         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8593         if (ret < 0) {
8594                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8595                         key.objectid, key.type, key.offset, ret);
8596                 goto out;
8597         }
8598         if (ret > 0) {
8599                 fprintf(stderr, "Well that's odd, we just found this key "
8600                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8601                         key.offset);
8602                 ret = -EINVAL;
8603                 goto out;
8604         }
8605         leaf = path->nodes[0];
8606         fi = btrfs_item_ptr(leaf, path->slots[0],
8607                             struct btrfs_file_extent_item);
8608
8609         if (btrfs_file_extent_compression(leaf, fi) &&
8610             dback->disk_bytenr != entry->bytenr) {
8611                 fprintf(stderr, "Ref doesn't match the record start and is "
8612                         "compressed, please take a btrfs-image of this file "
8613                         "system and send it to a btrfs developer so they can "
8614                         "complete this functionality for bytenr %Lu\n",
8615                         dback->disk_bytenr);
8616                 ret = -EINVAL;
8617                 goto out;
8618         }
8619
8620         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8621                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8622         } else if (dback->disk_bytenr > entry->bytenr) {
8623                 u64 off_diff, offset;
8624
8625                 off_diff = dback->disk_bytenr - entry->bytenr;
8626                 offset = btrfs_file_extent_offset(leaf, fi);
8627                 if (dback->disk_bytenr + offset +
8628                     btrfs_file_extent_num_bytes(leaf, fi) >
8629                     entry->bytenr + entry->bytes) {
8630                         fprintf(stderr, "Ref is past the entry end, please "
8631                                 "take a btrfs-image of this file system and "
8632                                 "send it to a btrfs developer, ref %Lu\n",
8633                                 dback->disk_bytenr);
8634                         ret = -EINVAL;
8635                         goto out;
8636                 }
8637                 offset += off_diff;
8638                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8639                 btrfs_set_file_extent_offset(leaf, fi, offset);
8640         } else if (dback->disk_bytenr < entry->bytenr) {
8641                 u64 offset;
8642
8643                 offset = btrfs_file_extent_offset(leaf, fi);
8644                 if (dback->disk_bytenr + offset < entry->bytenr) {
8645                         fprintf(stderr, "Ref is before the entry start, please"
8646                                 " take a btrfs-image of this file system and "
8647                                 "send it to a btrfs developer, ref %Lu\n",
8648                                 dback->disk_bytenr);
8649                         ret = -EINVAL;
8650                         goto out;
8651                 }
8652
8653                 offset += dback->disk_bytenr;
8654                 offset -= entry->bytenr;
8655                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8656                 btrfs_set_file_extent_offset(leaf, fi, offset);
8657         }
8658
8659         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8660
8661         /*
8662          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8663          * only do this if we aren't using compression, otherwise it's a
8664          * trickier case.
8665          */
8666         if (!btrfs_file_extent_compression(leaf, fi))
8667                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8668         else
8669                 printf("ram bytes may be wrong?\n");
8670         btrfs_mark_buffer_dirty(leaf);
8671 out:
8672         err = btrfs_commit_transaction(trans, root);
8673         btrfs_release_path(path);
8674         return ret ? ret : err;
8675 }
8676
8677 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8678                            struct extent_record *rec)
8679 {
8680         struct extent_backref *back;
8681         struct data_backref *dback;
8682         struct extent_entry *entry, *best = NULL;
8683         LIST_HEAD(entries);
8684         int nr_entries = 0;
8685         int broken_entries = 0;
8686         int ret = 0;
8687         short mismatch = 0;
8688
8689         /*
8690          * Metadata is easy and the backrefs should always agree on bytenr and
8691          * size, if not we've got bigger issues.
8692          */
8693         if (rec->metadata)
8694                 return 0;
8695
8696         list_for_each_entry(back, &rec->backrefs, list) {
8697                 if (back->full_backref || !back->is_data)
8698                         continue;
8699
8700                 dback = to_data_backref(back);
8701
8702                 /*
8703                  * We only pay attention to backrefs that we found a real
8704                  * backref for.
8705                  */
8706                 if (dback->found_ref == 0)
8707                         continue;
8708
8709                 /*
8710                  * For now we only catch when the bytes don't match, not the
8711                  * bytenr.  We can easily do this at the same time, but I want
8712                  * to have a fs image to test on before we just add repair
8713                  * functionality willy-nilly so we know we won't screw up the
8714                  * repair.
8715                  */
8716
8717                 entry = find_entry(&entries, dback->disk_bytenr,
8718                                    dback->bytes);
8719                 if (!entry) {
8720                         entry = malloc(sizeof(struct extent_entry));
8721                         if (!entry) {
8722                                 ret = -ENOMEM;
8723                                 goto out;
8724                         }
8725                         memset(entry, 0, sizeof(*entry));
8726                         entry->bytenr = dback->disk_bytenr;
8727                         entry->bytes = dback->bytes;
8728                         list_add_tail(&entry->list, &entries);
8729                         nr_entries++;
8730                 }
8731
8732                 /*
8733                  * If we only have on entry we may think the entries agree when
8734                  * in reality they don't so we have to do some extra checking.
8735                  */
8736                 if (dback->disk_bytenr != rec->start ||
8737                     dback->bytes != rec->nr || back->broken)
8738                         mismatch = 1;
8739
8740                 if (back->broken) {
8741                         entry->broken++;
8742                         broken_entries++;
8743                 }
8744
8745                 entry->count++;
8746         }
8747
8748         /* Yay all the backrefs agree, carry on good sir */
8749         if (nr_entries <= 1 && !mismatch)
8750                 goto out;
8751
8752         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8753                 "%Lu\n", rec->start);
8754
8755         /*
8756          * First we want to see if the backrefs can agree amongst themselves who
8757          * is right, so figure out which one of the entries has the highest
8758          * count.
8759          */
8760         best = find_most_right_entry(&entries);
8761
8762         /*
8763          * Ok so we may have an even split between what the backrefs think, so
8764          * this is where we use the extent ref to see what it thinks.
8765          */
8766         if (!best) {
8767                 entry = find_entry(&entries, rec->start, rec->nr);
8768                 if (!entry && (!broken_entries || !rec->found_rec)) {
8769                         fprintf(stderr, "Backrefs don't agree with each other "
8770                                 "and extent record doesn't agree with anybody,"
8771                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8772                                 rec->start, rec->nr);
8773                         ret = -EINVAL;
8774                         goto out;
8775                 } else if (!entry) {
8776                         /*
8777                          * Ok our backrefs were broken, we'll assume this is the
8778                          * correct value and add an entry for this range.
8779                          */
8780                         entry = malloc(sizeof(struct extent_entry));
8781                         if (!entry) {
8782                                 ret = -ENOMEM;
8783                                 goto out;
8784                         }
8785                         memset(entry, 0, sizeof(*entry));
8786                         entry->bytenr = rec->start;
8787                         entry->bytes = rec->nr;
8788                         list_add_tail(&entry->list, &entries);
8789                         nr_entries++;
8790                 }
8791                 entry->count++;
8792                 best = find_most_right_entry(&entries);
8793                 if (!best) {
8794                         fprintf(stderr, "Backrefs and extent record evenly "
8795                                 "split on who is right, this is going to "
8796                                 "require user input to fix bytenr %Lu bytes "
8797                                 "%Lu\n", rec->start, rec->nr);
8798                         ret = -EINVAL;
8799                         goto out;
8800                 }
8801         }
8802
8803         /*
8804          * I don't think this can happen currently as we'll abort() if we catch
8805          * this case higher up, but in case somebody removes that we still can't
8806          * deal with it properly here yet, so just bail out of that's the case.
8807          */
8808         if (best->bytenr != rec->start) {
8809                 fprintf(stderr, "Extent start and backref starts don't match, "
8810                         "please use btrfs-image on this file system and send "
8811                         "it to a btrfs developer so they can make fsck fix "
8812                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8813                         rec->start, rec->nr);
8814                 ret = -EINVAL;
8815                 goto out;
8816         }
8817
8818         /*
8819          * Ok great we all agreed on an extent record, let's go find the real
8820          * references and fix up the ones that don't match.
8821          */
8822         list_for_each_entry(back, &rec->backrefs, list) {
8823                 if (back->full_backref || !back->is_data)
8824                         continue;
8825
8826                 dback = to_data_backref(back);
8827
8828                 /*
8829                  * Still ignoring backrefs that don't have a real ref attached
8830                  * to them.
8831                  */
8832                 if (dback->found_ref == 0)
8833                         continue;
8834
8835                 if (dback->bytes == best->bytes &&
8836                     dback->disk_bytenr == best->bytenr)
8837                         continue;
8838
8839                 ret = repair_ref(info, path, dback, best);
8840                 if (ret)
8841                         goto out;
8842         }
8843
8844         /*
8845          * Ok we messed with the actual refs, which means we need to drop our
8846          * entire cache and go back and rescan.  I know this is a huge pain and
8847          * adds a lot of extra work, but it's the only way to be safe.  Once all
8848          * the backrefs agree we may not need to do anything to the extent
8849          * record itself.
8850          */
8851         ret = -EAGAIN;
8852 out:
8853         while (!list_empty(&entries)) {
8854                 entry = list_entry(entries.next, struct extent_entry, list);
8855                 list_del_init(&entry->list);
8856                 free(entry);
8857         }
8858         return ret;
8859 }
8860
8861 static int process_duplicates(struct cache_tree *extent_cache,
8862                               struct extent_record *rec)
8863 {
8864         struct extent_record *good, *tmp;
8865         struct cache_extent *cache;
8866         int ret;
8867
8868         /*
8869          * If we found a extent record for this extent then return, or if we
8870          * have more than one duplicate we are likely going to need to delete
8871          * something.
8872          */
8873         if (rec->found_rec || rec->num_duplicates > 1)
8874                 return 0;
8875
8876         /* Shouldn't happen but just in case */
8877         BUG_ON(!rec->num_duplicates);
8878
8879         /*
8880          * So this happens if we end up with a backref that doesn't match the
8881          * actual extent entry.  So either the backref is bad or the extent
8882          * entry is bad.  Either way we want to have the extent_record actually
8883          * reflect what we found in the extent_tree, so we need to take the
8884          * duplicate out and use that as the extent_record since the only way we
8885          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8886          */
8887         remove_cache_extent(extent_cache, &rec->cache);
8888
8889         good = to_extent_record(rec->dups.next);
8890         list_del_init(&good->list);
8891         INIT_LIST_HEAD(&good->backrefs);
8892         INIT_LIST_HEAD(&good->dups);
8893         good->cache.start = good->start;
8894         good->cache.size = good->nr;
8895         good->content_checked = 0;
8896         good->owner_ref_checked = 0;
8897         good->num_duplicates = 0;
8898         good->refs = rec->refs;
8899         list_splice_init(&rec->backrefs, &good->backrefs);
8900         while (1) {
8901                 cache = lookup_cache_extent(extent_cache, good->start,
8902                                             good->nr);
8903                 if (!cache)
8904                         break;
8905                 tmp = container_of(cache, struct extent_record, cache);
8906
8907                 /*
8908                  * If we find another overlapping extent and it's found_rec is
8909                  * set then it's a duplicate and we need to try and delete
8910                  * something.
8911                  */
8912                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8913                         if (list_empty(&good->list))
8914                                 list_add_tail(&good->list,
8915                                               &duplicate_extents);
8916                         good->num_duplicates += tmp->num_duplicates + 1;
8917                         list_splice_init(&tmp->dups, &good->dups);
8918                         list_del_init(&tmp->list);
8919                         list_add_tail(&tmp->list, &good->dups);
8920                         remove_cache_extent(extent_cache, &tmp->cache);
8921                         continue;
8922                 }
8923
8924                 /*
8925                  * Ok we have another non extent item backed extent rec, so lets
8926                  * just add it to this extent and carry on like we did above.
8927                  */
8928                 good->refs += tmp->refs;
8929                 list_splice_init(&tmp->backrefs, &good->backrefs);
8930                 remove_cache_extent(extent_cache, &tmp->cache);
8931                 free(tmp);
8932         }
8933         ret = insert_cache_extent(extent_cache, &good->cache);
8934         BUG_ON(ret);
8935         free(rec);
8936         return good->num_duplicates ? 0 : 1;
8937 }
8938
8939 static int delete_duplicate_records(struct btrfs_root *root,
8940                                     struct extent_record *rec)
8941 {
8942         struct btrfs_trans_handle *trans;
8943         LIST_HEAD(delete_list);
8944         struct btrfs_path path;
8945         struct extent_record *tmp, *good, *n;
8946         int nr_del = 0;
8947         int ret = 0, err;
8948         struct btrfs_key key;
8949
8950         btrfs_init_path(&path);
8951
8952         good = rec;
8953         /* Find the record that covers all of the duplicates. */
8954         list_for_each_entry(tmp, &rec->dups, list) {
8955                 if (good->start < tmp->start)
8956                         continue;
8957                 if (good->nr > tmp->nr)
8958                         continue;
8959
8960                 if (tmp->start + tmp->nr < good->start + good->nr) {
8961                         fprintf(stderr, "Ok we have overlapping extents that "
8962                                 "aren't completely covered by each other, this "
8963                                 "is going to require more careful thought.  "
8964                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8965                                 tmp->start, tmp->nr, good->start, good->nr);
8966                         abort();
8967                 }
8968                 good = tmp;
8969         }
8970
8971         if (good != rec)
8972                 list_add_tail(&rec->list, &delete_list);
8973
8974         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8975                 if (tmp == good)
8976                         continue;
8977                 list_move_tail(&tmp->list, &delete_list);
8978         }
8979
8980         root = root->fs_info->extent_root;
8981         trans = btrfs_start_transaction(root, 1);
8982         if (IS_ERR(trans)) {
8983                 ret = PTR_ERR(trans);
8984                 goto out;
8985         }
8986
8987         list_for_each_entry(tmp, &delete_list, list) {
8988                 if (tmp->found_rec == 0)
8989                         continue;
8990                 key.objectid = tmp->start;
8991                 key.type = BTRFS_EXTENT_ITEM_KEY;
8992                 key.offset = tmp->nr;
8993
8994                 /* Shouldn't happen but just in case */
8995                 if (tmp->metadata) {
8996                         fprintf(stderr, "Well this shouldn't happen, extent "
8997                                 "record overlaps but is metadata? "
8998                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8999                         abort();
9000                 }
9001
9002                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9003                 if (ret) {
9004                         if (ret > 0)
9005                                 ret = -EINVAL;
9006                         break;
9007                 }
9008                 ret = btrfs_del_item(trans, root, &path);
9009                 if (ret)
9010                         break;
9011                 btrfs_release_path(&path);
9012                 nr_del++;
9013         }
9014         err = btrfs_commit_transaction(trans, root);
9015         if (err && !ret)
9016                 ret = err;
9017 out:
9018         while (!list_empty(&delete_list)) {
9019                 tmp = to_extent_record(delete_list.next);
9020                 list_del_init(&tmp->list);
9021                 if (tmp == rec)
9022                         continue;
9023                 free(tmp);
9024         }
9025
9026         while (!list_empty(&rec->dups)) {
9027                 tmp = to_extent_record(rec->dups.next);
9028                 list_del_init(&tmp->list);
9029                 free(tmp);
9030         }
9031
9032         btrfs_release_path(&path);
9033
9034         if (!ret && !nr_del)
9035                 rec->num_duplicates = 0;
9036
9037         return ret ? ret : nr_del;
9038 }
9039
9040 static int find_possible_backrefs(struct btrfs_fs_info *info,
9041                                   struct btrfs_path *path,
9042                                   struct cache_tree *extent_cache,
9043                                   struct extent_record *rec)
9044 {
9045         struct btrfs_root *root;
9046         struct extent_backref *back;
9047         struct data_backref *dback;
9048         struct cache_extent *cache;
9049         struct btrfs_file_extent_item *fi;
9050         struct btrfs_key key;
9051         u64 bytenr, bytes;
9052         int ret;
9053
9054         list_for_each_entry(back, &rec->backrefs, list) {
9055                 /* Don't care about full backrefs (poor unloved backrefs) */
9056                 if (back->full_backref || !back->is_data)
9057                         continue;
9058
9059                 dback = to_data_backref(back);
9060
9061                 /* We found this one, we don't need to do a lookup */
9062                 if (dback->found_ref)
9063                         continue;
9064
9065                 key.objectid = dback->root;
9066                 key.type = BTRFS_ROOT_ITEM_KEY;
9067                 key.offset = (u64)-1;
9068
9069                 root = btrfs_read_fs_root(info, &key);
9070
9071                 /* No root, definitely a bad ref, skip */
9072                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9073                         continue;
9074                 /* Other err, exit */
9075                 if (IS_ERR(root))
9076                         return PTR_ERR(root);
9077
9078                 key.objectid = dback->owner;
9079                 key.type = BTRFS_EXTENT_DATA_KEY;
9080                 key.offset = dback->offset;
9081                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9082                 if (ret) {
9083                         btrfs_release_path(path);
9084                         if (ret < 0)
9085                                 return ret;
9086                         /* Didn't find it, we can carry on */
9087                         ret = 0;
9088                         continue;
9089                 }
9090
9091                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9092                                     struct btrfs_file_extent_item);
9093                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9094                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9095                 btrfs_release_path(path);
9096                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9097                 if (cache) {
9098                         struct extent_record *tmp;
9099                         tmp = container_of(cache, struct extent_record, cache);
9100
9101                         /*
9102                          * If we found an extent record for the bytenr for this
9103                          * particular backref then we can't add it to our
9104                          * current extent record.  We only want to add backrefs
9105                          * that don't have a corresponding extent item in the
9106                          * extent tree since they likely belong to this record
9107                          * and we need to fix it if it doesn't match bytenrs.
9108                          */
9109                         if  (tmp->found_rec)
9110                                 continue;
9111                 }
9112
9113                 dback->found_ref += 1;
9114                 dback->disk_bytenr = bytenr;
9115                 dback->bytes = bytes;
9116
9117                 /*
9118                  * Set this so the verify backref code knows not to trust the
9119                  * values in this backref.
9120                  */
9121                 back->broken = 1;
9122         }
9123
9124         return 0;
9125 }
9126
9127 /*
9128  * Record orphan data ref into corresponding root.
9129  *
9130  * Return 0 if the extent item contains data ref and recorded.
9131  * Return 1 if the extent item contains no useful data ref
9132  *   On that case, it may contains only shared_dataref or metadata backref
9133  *   or the file extent exists(this should be handled by the extent bytenr
9134  *   recovery routine)
9135  * Return <0 if something goes wrong.
9136  */
9137 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9138                                       struct extent_record *rec)
9139 {
9140         struct btrfs_key key;
9141         struct btrfs_root *dest_root;
9142         struct extent_backref *back;
9143         struct data_backref *dback;
9144         struct orphan_data_extent *orphan;
9145         struct btrfs_path path;
9146         int recorded_data_ref = 0;
9147         int ret = 0;
9148
9149         if (rec->metadata)
9150                 return 1;
9151         btrfs_init_path(&path);
9152         list_for_each_entry(back, &rec->backrefs, list) {
9153                 if (back->full_backref || !back->is_data ||
9154                     !back->found_extent_tree)
9155                         continue;
9156                 dback = to_data_backref(back);
9157                 if (dback->found_ref)
9158                         continue;
9159                 key.objectid = dback->root;
9160                 key.type = BTRFS_ROOT_ITEM_KEY;
9161                 key.offset = (u64)-1;
9162
9163                 dest_root = btrfs_read_fs_root(fs_info, &key);
9164
9165                 /* For non-exist root we just skip it */
9166                 if (IS_ERR(dest_root) || !dest_root)
9167                         continue;
9168
9169                 key.objectid = dback->owner;
9170                 key.type = BTRFS_EXTENT_DATA_KEY;
9171                 key.offset = dback->offset;
9172
9173                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9174                 btrfs_release_path(&path);
9175                 /*
9176                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9177                  * we need to record it for inode/file extent rebuild.
9178                  * For ret > 0, we record it only for file extent rebuild.
9179                  * For ret == 0, the file extent exists but only bytenr
9180                  * mismatch, let the original bytenr fix routine to handle,
9181                  * don't record it.
9182                  */
9183                 if (ret == 0)
9184                         continue;
9185                 ret = 0;
9186                 orphan = malloc(sizeof(*orphan));
9187                 if (!orphan) {
9188                         ret = -ENOMEM;
9189                         goto out;
9190                 }
9191                 INIT_LIST_HEAD(&orphan->list);
9192                 orphan->root = dback->root;
9193                 orphan->objectid = dback->owner;
9194                 orphan->offset = dback->offset;
9195                 orphan->disk_bytenr = rec->cache.start;
9196                 orphan->disk_len = rec->cache.size;
9197                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9198                 recorded_data_ref = 1;
9199         }
9200 out:
9201         btrfs_release_path(&path);
9202         if (!ret)
9203                 return !recorded_data_ref;
9204         else
9205                 return ret;
9206 }
9207
9208 /*
9209  * when an incorrect extent item is found, this will delete
9210  * all of the existing entries for it and recreate them
9211  * based on what the tree scan found.
9212  */
9213 static int fixup_extent_refs(struct btrfs_fs_info *info,
9214                              struct cache_tree *extent_cache,
9215                              struct extent_record *rec)
9216 {
9217         struct btrfs_trans_handle *trans = NULL;
9218         int ret;
9219         struct btrfs_path path;
9220         struct list_head *cur = rec->backrefs.next;
9221         struct cache_extent *cache;
9222         struct extent_backref *back;
9223         int allocated = 0;
9224         u64 flags = 0;
9225
9226         if (rec->flag_block_full_backref)
9227                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9228
9229         btrfs_init_path(&path);
9230         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9231                 /*
9232                  * Sometimes the backrefs themselves are so broken they don't
9233                  * get attached to any meaningful rec, so first go back and
9234                  * check any of our backrefs that we couldn't find and throw
9235                  * them into the list if we find the backref so that
9236                  * verify_backrefs can figure out what to do.
9237                  */
9238                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9239                 if (ret < 0)
9240                         goto out;
9241         }
9242
9243         /* step one, make sure all of the backrefs agree */
9244         ret = verify_backrefs(info, &path, rec);
9245         if (ret < 0)
9246                 goto out;
9247
9248         trans = btrfs_start_transaction(info->extent_root, 1);
9249         if (IS_ERR(trans)) {
9250                 ret = PTR_ERR(trans);
9251                 goto out;
9252         }
9253
9254         /* step two, delete all the existing records */
9255         ret = delete_extent_records(trans, info->extent_root, &path,
9256                                     rec->start);
9257
9258         if (ret < 0)
9259                 goto out;
9260
9261         /* was this block corrupt?  If so, don't add references to it */
9262         cache = lookup_cache_extent(info->corrupt_blocks,
9263                                     rec->start, rec->max_size);
9264         if (cache) {
9265                 ret = 0;
9266                 goto out;
9267         }
9268
9269         /* step three, recreate all the refs we did find */
9270         while(cur != &rec->backrefs) {
9271                 back = to_extent_backref(cur);
9272                 cur = cur->next;
9273
9274                 /*
9275                  * if we didn't find any references, don't create a
9276                  * new extent record
9277                  */
9278                 if (!back->found_ref)
9279                         continue;
9280
9281                 rec->bad_full_backref = 0;
9282                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9283                 allocated = 1;
9284
9285                 if (ret)
9286                         goto out;
9287         }
9288 out:
9289         if (trans) {
9290                 int err = btrfs_commit_transaction(trans, info->extent_root);
9291                 if (!ret)
9292                         ret = err;
9293         }
9294
9295         if (!ret)
9296                 fprintf(stderr, "Repaired extent references for %llu\n",
9297                                 (unsigned long long)rec->start);
9298
9299         btrfs_release_path(&path);
9300         return ret;
9301 }
9302
9303 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9304                               struct extent_record *rec)
9305 {
9306         struct btrfs_trans_handle *trans;
9307         struct btrfs_root *root = fs_info->extent_root;
9308         struct btrfs_path path;
9309         struct btrfs_extent_item *ei;
9310         struct btrfs_key key;
9311         u64 flags;
9312         int ret = 0;
9313
9314         key.objectid = rec->start;
9315         if (rec->metadata) {
9316                 key.type = BTRFS_METADATA_ITEM_KEY;
9317                 key.offset = rec->info_level;
9318         } else {
9319                 key.type = BTRFS_EXTENT_ITEM_KEY;
9320                 key.offset = rec->max_size;
9321         }
9322
9323         trans = btrfs_start_transaction(root, 0);
9324         if (IS_ERR(trans))
9325                 return PTR_ERR(trans);
9326
9327         btrfs_init_path(&path);
9328         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9329         if (ret < 0) {
9330                 btrfs_release_path(&path);
9331                 btrfs_commit_transaction(trans, root);
9332                 return ret;
9333         } else if (ret) {
9334                 fprintf(stderr, "Didn't find extent for %llu\n",
9335                         (unsigned long long)rec->start);
9336                 btrfs_release_path(&path);
9337                 btrfs_commit_transaction(trans, root);
9338                 return -ENOENT;
9339         }
9340
9341         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9342                             struct btrfs_extent_item);
9343         flags = btrfs_extent_flags(path.nodes[0], ei);
9344         if (rec->flag_block_full_backref) {
9345                 fprintf(stderr, "setting full backref on %llu\n",
9346                         (unsigned long long)key.objectid);
9347                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9348         } else {
9349                 fprintf(stderr, "clearing full backref on %llu\n",
9350                         (unsigned long long)key.objectid);
9351                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9352         }
9353         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9354         btrfs_mark_buffer_dirty(path.nodes[0]);
9355         btrfs_release_path(&path);
9356         ret = btrfs_commit_transaction(trans, root);
9357         if (!ret)
9358                 fprintf(stderr, "Repaired extent flags for %llu\n",
9359                                 (unsigned long long)rec->start);
9360
9361         return ret;
9362 }
9363
9364 /* right now we only prune from the extent allocation tree */
9365 static int prune_one_block(struct btrfs_trans_handle *trans,
9366                            struct btrfs_fs_info *info,
9367                            struct btrfs_corrupt_block *corrupt)
9368 {
9369         int ret;
9370         struct btrfs_path path;
9371         struct extent_buffer *eb;
9372         u64 found;
9373         int slot;
9374         int nritems;
9375         int level = corrupt->level + 1;
9376
9377         btrfs_init_path(&path);
9378 again:
9379         /* we want to stop at the parent to our busted block */
9380         path.lowest_level = level;
9381
9382         ret = btrfs_search_slot(trans, info->extent_root,
9383                                 &corrupt->key, &path, -1, 1);
9384
9385         if (ret < 0)
9386                 goto out;
9387
9388         eb = path.nodes[level];
9389         if (!eb) {
9390                 ret = -ENOENT;
9391                 goto out;
9392         }
9393
9394         /*
9395          * hopefully the search gave us the block we want to prune,
9396          * lets try that first
9397          */
9398         slot = path.slots[level];
9399         found =  btrfs_node_blockptr(eb, slot);
9400         if (found == corrupt->cache.start)
9401                 goto del_ptr;
9402
9403         nritems = btrfs_header_nritems(eb);
9404
9405         /* the search failed, lets scan this node and hope we find it */
9406         for (slot = 0; slot < nritems; slot++) {
9407                 found =  btrfs_node_blockptr(eb, slot);
9408                 if (found == corrupt->cache.start)
9409                         goto del_ptr;
9410         }
9411         /*
9412          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9413          * to this block
9414          */
9415         if (eb == info->extent_root->node) {
9416                 ret = -ENOENT;
9417                 goto out;
9418         } else {
9419                 level++;
9420                 btrfs_release_path(&path);
9421                 goto again;
9422         }
9423
9424 del_ptr:
9425         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9426         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9427
9428 out:
9429         btrfs_release_path(&path);
9430         return ret;
9431 }
9432
9433 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9434 {
9435         struct btrfs_trans_handle *trans = NULL;
9436         struct cache_extent *cache;
9437         struct btrfs_corrupt_block *corrupt;
9438
9439         while (1) {
9440                 cache = search_cache_extent(info->corrupt_blocks, 0);
9441                 if (!cache)
9442                         break;
9443                 if (!trans) {
9444                         trans = btrfs_start_transaction(info->extent_root, 1);
9445                         if (IS_ERR(trans))
9446                                 return PTR_ERR(trans);
9447                 }
9448                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9449                 prune_one_block(trans, info, corrupt);
9450                 remove_cache_extent(info->corrupt_blocks, cache);
9451         }
9452         if (trans)
9453                 return btrfs_commit_transaction(trans, info->extent_root);
9454         return 0;
9455 }
9456
9457 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9458 {
9459         struct btrfs_block_group_cache *cache;
9460         u64 start, end;
9461         int ret;
9462
9463         while (1) {
9464                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9465                                             &start, &end, EXTENT_DIRTY);
9466                 if (ret)
9467                         break;
9468                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9469         }
9470
9471         start = 0;
9472         while (1) {
9473                 cache = btrfs_lookup_first_block_group(fs_info, start);
9474                 if (!cache)
9475                         break;
9476                 if (cache->cached)
9477                         cache->cached = 0;
9478                 start = cache->key.objectid + cache->key.offset;
9479         }
9480 }
9481
9482 static int check_extent_refs(struct btrfs_root *root,
9483                              struct cache_tree *extent_cache)
9484 {
9485         struct extent_record *rec;
9486         struct cache_extent *cache;
9487         int ret = 0;
9488         int had_dups = 0;
9489
9490         if (repair) {
9491                 /*
9492                  * if we're doing a repair, we have to make sure
9493                  * we don't allocate from the problem extents.
9494                  * In the worst case, this will be all the
9495                  * extents in the FS
9496                  */
9497                 cache = search_cache_extent(extent_cache, 0);
9498                 while(cache) {
9499                         rec = container_of(cache, struct extent_record, cache);
9500                         set_extent_dirty(root->fs_info->excluded_extents,
9501                                          rec->start,
9502                                          rec->start + rec->max_size - 1);
9503                         cache = next_cache_extent(cache);
9504                 }
9505
9506                 /* pin down all the corrupted blocks too */
9507                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9508                 while(cache) {
9509                         set_extent_dirty(root->fs_info->excluded_extents,
9510                                          cache->start,
9511                                          cache->start + cache->size - 1);
9512                         cache = next_cache_extent(cache);
9513                 }
9514                 prune_corrupt_blocks(root->fs_info);
9515                 reset_cached_block_groups(root->fs_info);
9516         }
9517
9518         reset_cached_block_groups(root->fs_info);
9519
9520         /*
9521          * We need to delete any duplicate entries we find first otherwise we
9522          * could mess up the extent tree when we have backrefs that actually
9523          * belong to a different extent item and not the weird duplicate one.
9524          */
9525         while (repair && !list_empty(&duplicate_extents)) {
9526                 rec = to_extent_record(duplicate_extents.next);
9527                 list_del_init(&rec->list);
9528
9529                 /* Sometimes we can find a backref before we find an actual
9530                  * extent, so we need to process it a little bit to see if there
9531                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9532                  * if this is a backref screwup.  If we need to delete stuff
9533                  * process_duplicates() will return 0, otherwise it will return
9534                  * 1 and we
9535                  */
9536                 if (process_duplicates(extent_cache, rec))
9537                         continue;
9538                 ret = delete_duplicate_records(root, rec);
9539                 if (ret < 0)
9540                         return ret;
9541                 /*
9542                  * delete_duplicate_records will return the number of entries
9543                  * deleted, so if it's greater than 0 then we know we actually
9544                  * did something and we need to remove.
9545                  */
9546                 if (ret)
9547                         had_dups = 1;
9548         }
9549
9550         if (had_dups)
9551                 return -EAGAIN;
9552
9553         while(1) {
9554                 int cur_err = 0;
9555                 int fix = 0;
9556
9557                 cache = search_cache_extent(extent_cache, 0);
9558                 if (!cache)
9559                         break;
9560                 rec = container_of(cache, struct extent_record, cache);
9561                 if (rec->num_duplicates) {
9562                         fprintf(stderr, "extent item %llu has multiple extent "
9563                                 "items\n", (unsigned long long)rec->start);
9564                         cur_err = 1;
9565                 }
9566
9567                 if (rec->refs != rec->extent_item_refs) {
9568                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9569                                 (unsigned long long)rec->start,
9570                                 (unsigned long long)rec->nr);
9571                         fprintf(stderr, "extent item %llu, found %llu\n",
9572                                 (unsigned long long)rec->extent_item_refs,
9573                                 (unsigned long long)rec->refs);
9574                         ret = record_orphan_data_extents(root->fs_info, rec);
9575                         if (ret < 0)
9576                                 goto repair_abort;
9577                         fix = ret;
9578                         cur_err = 1;
9579                 }
9580                 if (all_backpointers_checked(rec, 1)) {
9581                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9582                                 (unsigned long long)rec->start,
9583                                 (unsigned long long)rec->nr);
9584                         fix = 1;
9585                         cur_err = 1;
9586                 }
9587                 if (!rec->owner_ref_checked) {
9588                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9589                                 (unsigned long long)rec->start,
9590                                 (unsigned long long)rec->nr);
9591                         fix = 1;
9592                         cur_err = 1;
9593                 }
9594
9595                 if (repair && fix) {
9596                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9597                         if (ret)
9598                                 goto repair_abort;
9599                 }
9600
9601
9602                 if (rec->bad_full_backref) {
9603                         fprintf(stderr, "bad full backref, on [%llu]\n",
9604                                 (unsigned long long)rec->start);
9605                         if (repair) {
9606                                 ret = fixup_extent_flags(root->fs_info, rec);
9607                                 if (ret)
9608                                         goto repair_abort;
9609                                 fix = 1;
9610                         }
9611                         cur_err = 1;
9612                 }
9613                 /*
9614                  * Although it's not a extent ref's problem, we reuse this
9615                  * routine for error reporting.
9616                  * No repair function yet.
9617                  */
9618                 if (rec->crossing_stripes) {
9619                         fprintf(stderr,
9620                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9621                                 rec->start, rec->start + rec->max_size);
9622                         cur_err = 1;
9623                 }
9624
9625                 if (rec->wrong_chunk_type) {
9626                         fprintf(stderr,
9627                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9628                                 rec->start, rec->start + rec->max_size);
9629                         cur_err = 1;
9630                 }
9631
9632                 remove_cache_extent(extent_cache, cache);
9633                 free_all_extent_backrefs(rec);
9634                 if (!init_extent_tree && repair && (!cur_err || fix))
9635                         clear_extent_dirty(root->fs_info->excluded_extents,
9636                                            rec->start,
9637                                            rec->start + rec->max_size - 1);
9638                 free(rec);
9639         }
9640 repair_abort:
9641         if (repair) {
9642                 if (ret && ret != -EAGAIN) {
9643                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9644                         exit(1);
9645                 } else if (!ret) {
9646                         struct btrfs_trans_handle *trans;
9647
9648                         root = root->fs_info->extent_root;
9649                         trans = btrfs_start_transaction(root, 1);
9650                         if (IS_ERR(trans)) {
9651                                 ret = PTR_ERR(trans);
9652                                 goto repair_abort;
9653                         }
9654
9655                         ret = btrfs_fix_block_accounting(trans, root);
9656                         if (ret)
9657                                 goto repair_abort;
9658                         ret = btrfs_commit_transaction(trans, root);
9659                         if (ret)
9660                                 goto repair_abort;
9661                 }
9662                 return ret;
9663         }
9664         return 0;
9665 }
9666
9667 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9668 {
9669         u64 stripe_size;
9670
9671         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9672                 stripe_size = length;
9673                 stripe_size /= num_stripes;
9674         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9675                 stripe_size = length * 2;
9676                 stripe_size /= num_stripes;
9677         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9678                 stripe_size = length;
9679                 stripe_size /= (num_stripes - 1);
9680         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9681                 stripe_size = length;
9682                 stripe_size /= (num_stripes - 2);
9683         } else {
9684                 stripe_size = length;
9685         }
9686         return stripe_size;
9687 }
9688
9689 /*
9690  * Check the chunk with its block group/dev list ref:
9691  * Return 0 if all refs seems valid.
9692  * Return 1 if part of refs seems valid, need later check for rebuild ref
9693  * like missing block group and needs to search extent tree to rebuild them.
9694  * Return -1 if essential refs are missing and unable to rebuild.
9695  */
9696 static int check_chunk_refs(struct chunk_record *chunk_rec,
9697                             struct block_group_tree *block_group_cache,
9698                             struct device_extent_tree *dev_extent_cache,
9699                             int silent)
9700 {
9701         struct cache_extent *block_group_item;
9702         struct block_group_record *block_group_rec;
9703         struct cache_extent *dev_extent_item;
9704         struct device_extent_record *dev_extent_rec;
9705         u64 devid;
9706         u64 offset;
9707         u64 length;
9708         int metadump_v2 = 0;
9709         int i;
9710         int ret = 0;
9711
9712         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9713                                                chunk_rec->offset,
9714                                                chunk_rec->length);
9715         if (block_group_item) {
9716                 block_group_rec = container_of(block_group_item,
9717                                                struct block_group_record,
9718                                                cache);
9719                 if (chunk_rec->length != block_group_rec->offset ||
9720                     chunk_rec->offset != block_group_rec->objectid ||
9721                     (!metadump_v2 &&
9722                      chunk_rec->type_flags != block_group_rec->flags)) {
9723                         if (!silent)
9724                                 fprintf(stderr,
9725                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9726                                         chunk_rec->objectid,
9727                                         chunk_rec->type,
9728                                         chunk_rec->offset,
9729                                         chunk_rec->length,
9730                                         chunk_rec->offset,
9731                                         chunk_rec->type_flags,
9732                                         block_group_rec->objectid,
9733                                         block_group_rec->type,
9734                                         block_group_rec->offset,
9735                                         block_group_rec->offset,
9736                                         block_group_rec->objectid,
9737                                         block_group_rec->flags);
9738                         ret = -1;
9739                 } else {
9740                         list_del_init(&block_group_rec->list);
9741                         chunk_rec->bg_rec = block_group_rec;
9742                 }
9743         } else {
9744                 if (!silent)
9745                         fprintf(stderr,
9746                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9747                                 chunk_rec->objectid,
9748                                 chunk_rec->type,
9749                                 chunk_rec->offset,
9750                                 chunk_rec->length,
9751                                 chunk_rec->offset,
9752                                 chunk_rec->type_flags);
9753                 ret = 1;
9754         }
9755
9756         if (metadump_v2)
9757                 return ret;
9758
9759         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9760                                     chunk_rec->num_stripes);
9761         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9762                 devid = chunk_rec->stripes[i].devid;
9763                 offset = chunk_rec->stripes[i].offset;
9764                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9765                                                        devid, offset, length);
9766                 if (dev_extent_item) {
9767                         dev_extent_rec = container_of(dev_extent_item,
9768                                                 struct device_extent_record,
9769                                                 cache);
9770                         if (dev_extent_rec->objectid != devid ||
9771                             dev_extent_rec->offset != offset ||
9772                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9773                             dev_extent_rec->length != length) {
9774                                 if (!silent)
9775                                         fprintf(stderr,
9776                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9777                                                 chunk_rec->objectid,
9778                                                 chunk_rec->type,
9779                                                 chunk_rec->offset,
9780                                                 chunk_rec->stripes[i].devid,
9781                                                 chunk_rec->stripes[i].offset,
9782                                                 dev_extent_rec->objectid,
9783                                                 dev_extent_rec->offset,
9784                                                 dev_extent_rec->length);
9785                                 ret = -1;
9786                         } else {
9787                                 list_move(&dev_extent_rec->chunk_list,
9788                                           &chunk_rec->dextents);
9789                         }
9790                 } else {
9791                         if (!silent)
9792                                 fprintf(stderr,
9793                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9794                                         chunk_rec->objectid,
9795                                         chunk_rec->type,
9796                                         chunk_rec->offset,
9797                                         chunk_rec->stripes[i].devid,
9798                                         chunk_rec->stripes[i].offset);
9799                         ret = -1;
9800                 }
9801         }
9802         return ret;
9803 }
9804
9805 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9806 int check_chunks(struct cache_tree *chunk_cache,
9807                  struct block_group_tree *block_group_cache,
9808                  struct device_extent_tree *dev_extent_cache,
9809                  struct list_head *good, struct list_head *bad,
9810                  struct list_head *rebuild, int silent)
9811 {
9812         struct cache_extent *chunk_item;
9813         struct chunk_record *chunk_rec;
9814         struct block_group_record *bg_rec;
9815         struct device_extent_record *dext_rec;
9816         int err;
9817         int ret = 0;
9818
9819         chunk_item = first_cache_extent(chunk_cache);
9820         while (chunk_item) {
9821                 chunk_rec = container_of(chunk_item, struct chunk_record,
9822                                          cache);
9823                 err = check_chunk_refs(chunk_rec, block_group_cache,
9824                                        dev_extent_cache, silent);
9825                 if (err < 0)
9826                         ret = err;
9827                 if (err == 0 && good)
9828                         list_add_tail(&chunk_rec->list, good);
9829                 if (err > 0 && rebuild)
9830                         list_add_tail(&chunk_rec->list, rebuild);
9831                 if (err < 0 && bad)
9832                         list_add_tail(&chunk_rec->list, bad);
9833                 chunk_item = next_cache_extent(chunk_item);
9834         }
9835
9836         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9837                 if (!silent)
9838                         fprintf(stderr,
9839                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9840                                 bg_rec->objectid,
9841                                 bg_rec->offset,
9842                                 bg_rec->flags);
9843                 if (!ret)
9844                         ret = 1;
9845         }
9846
9847         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9848                             chunk_list) {
9849                 if (!silent)
9850                         fprintf(stderr,
9851                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9852                                 dext_rec->objectid,
9853                                 dext_rec->offset,
9854                                 dext_rec->length);
9855                 if (!ret)
9856                         ret = 1;
9857         }
9858         return ret;
9859 }
9860
9861
9862 static int check_device_used(struct device_record *dev_rec,
9863                              struct device_extent_tree *dext_cache)
9864 {
9865         struct cache_extent *cache;
9866         struct device_extent_record *dev_extent_rec;
9867         u64 total_byte = 0;
9868
9869         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9870         while (cache) {
9871                 dev_extent_rec = container_of(cache,
9872                                               struct device_extent_record,
9873                                               cache);
9874                 if (dev_extent_rec->objectid != dev_rec->devid)
9875                         break;
9876
9877                 list_del_init(&dev_extent_rec->device_list);
9878                 total_byte += dev_extent_rec->length;
9879                 cache = next_cache_extent(cache);
9880         }
9881
9882         if (total_byte != dev_rec->byte_used) {
9883                 fprintf(stderr,
9884                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9885                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9886                         dev_rec->type, dev_rec->offset);
9887                 return -1;
9888         } else {
9889                 return 0;
9890         }
9891 }
9892
9893 /* check btrfs_dev_item -> btrfs_dev_extent */
9894 static int check_devices(struct rb_root *dev_cache,
9895                          struct device_extent_tree *dev_extent_cache)
9896 {
9897         struct rb_node *dev_node;
9898         struct device_record *dev_rec;
9899         struct device_extent_record *dext_rec;
9900         int err;
9901         int ret = 0;
9902
9903         dev_node = rb_first(dev_cache);
9904         while (dev_node) {
9905                 dev_rec = container_of(dev_node, struct device_record, node);
9906                 err = check_device_used(dev_rec, dev_extent_cache);
9907                 if (err)
9908                         ret = err;
9909
9910                 dev_node = rb_next(dev_node);
9911         }
9912         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9913                             device_list) {
9914                 fprintf(stderr,
9915                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9916                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9917                 if (!ret)
9918                         ret = 1;
9919         }
9920         return ret;
9921 }
9922
9923 static int add_root_item_to_list(struct list_head *head,
9924                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9925                                   u8 level, u8 drop_level,
9926                                   struct btrfs_key *drop_key)
9927 {
9928
9929         struct root_item_record *ri_rec;
9930         ri_rec = malloc(sizeof(*ri_rec));
9931         if (!ri_rec)
9932                 return -ENOMEM;
9933         ri_rec->bytenr = bytenr;
9934         ri_rec->objectid = objectid;
9935         ri_rec->level = level;
9936         ri_rec->drop_level = drop_level;
9937         ri_rec->last_snapshot = last_snapshot;
9938         if (drop_key)
9939                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9940         list_add_tail(&ri_rec->list, head);
9941
9942         return 0;
9943 }
9944
9945 static void free_root_item_list(struct list_head *list)
9946 {
9947         struct root_item_record *ri_rec;
9948
9949         while (!list_empty(list)) {
9950                 ri_rec = list_first_entry(list, struct root_item_record,
9951                                           list);
9952                 list_del_init(&ri_rec->list);
9953                 free(ri_rec);
9954         }
9955 }
9956
9957 static int deal_root_from_list(struct list_head *list,
9958                                struct btrfs_root *root,
9959                                struct block_info *bits,
9960                                int bits_nr,
9961                                struct cache_tree *pending,
9962                                struct cache_tree *seen,
9963                                struct cache_tree *reada,
9964                                struct cache_tree *nodes,
9965                                struct cache_tree *extent_cache,
9966                                struct cache_tree *chunk_cache,
9967                                struct rb_root *dev_cache,
9968                                struct block_group_tree *block_group_cache,
9969                                struct device_extent_tree *dev_extent_cache)
9970 {
9971         int ret = 0;
9972         u64 last;
9973
9974         while (!list_empty(list)) {
9975                 struct root_item_record *rec;
9976                 struct extent_buffer *buf;
9977                 rec = list_entry(list->next,
9978                                  struct root_item_record, list);
9979                 last = 0;
9980                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9981                 if (!extent_buffer_uptodate(buf)) {
9982                         free_extent_buffer(buf);
9983                         ret = -EIO;
9984                         break;
9985                 }
9986                 ret = add_root_to_pending(buf, extent_cache, pending,
9987                                     seen, nodes, rec->objectid);
9988                 if (ret < 0)
9989                         break;
9990                 /*
9991                  * To rebuild extent tree, we need deal with snapshot
9992                  * one by one, otherwise we deal with node firstly which
9993                  * can maximize readahead.
9994                  */
9995                 while (1) {
9996                         ret = run_next_block(root, bits, bits_nr, &last,
9997                                              pending, seen, reada, nodes,
9998                                              extent_cache, chunk_cache,
9999                                              dev_cache, block_group_cache,
10000                                              dev_extent_cache, rec);
10001                         if (ret != 0)
10002                                 break;
10003                 }
10004                 free_extent_buffer(buf);
10005                 list_del(&rec->list);
10006                 free(rec);
10007                 if (ret < 0)
10008                         break;
10009         }
10010         while (ret >= 0) {
10011                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10012                                      reada, nodes, extent_cache, chunk_cache,
10013                                      dev_cache, block_group_cache,
10014                                      dev_extent_cache, NULL);
10015                 if (ret != 0) {
10016                         if (ret > 0)
10017                                 ret = 0;
10018                         break;
10019                 }
10020         }
10021         return ret;
10022 }
10023
10024 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10025 {
10026         struct rb_root dev_cache;
10027         struct cache_tree chunk_cache;
10028         struct block_group_tree block_group_cache;
10029         struct device_extent_tree dev_extent_cache;
10030         struct cache_tree extent_cache;
10031         struct cache_tree seen;
10032         struct cache_tree pending;
10033         struct cache_tree reada;
10034         struct cache_tree nodes;
10035         struct extent_io_tree excluded_extents;
10036         struct cache_tree corrupt_blocks;
10037         struct btrfs_path path;
10038         struct btrfs_key key;
10039         struct btrfs_key found_key;
10040         int ret, err = 0;
10041         struct block_info *bits;
10042         int bits_nr;
10043         struct extent_buffer *leaf;
10044         int slot;
10045         struct btrfs_root_item ri;
10046         struct list_head dropping_trees;
10047         struct list_head normal_trees;
10048         struct btrfs_root *root1;
10049         struct btrfs_root *root;
10050         u64 objectid;
10051         u8 level;
10052
10053         root = fs_info->fs_root;
10054         dev_cache = RB_ROOT;
10055         cache_tree_init(&chunk_cache);
10056         block_group_tree_init(&block_group_cache);
10057         device_extent_tree_init(&dev_extent_cache);
10058
10059         cache_tree_init(&extent_cache);
10060         cache_tree_init(&seen);
10061         cache_tree_init(&pending);
10062         cache_tree_init(&nodes);
10063         cache_tree_init(&reada);
10064         cache_tree_init(&corrupt_blocks);
10065         extent_io_tree_init(&excluded_extents);
10066         INIT_LIST_HEAD(&dropping_trees);
10067         INIT_LIST_HEAD(&normal_trees);
10068
10069         if (repair) {
10070                 fs_info->excluded_extents = &excluded_extents;
10071                 fs_info->fsck_extent_cache = &extent_cache;
10072                 fs_info->free_extent_hook = free_extent_hook;
10073                 fs_info->corrupt_blocks = &corrupt_blocks;
10074         }
10075
10076         bits_nr = 1024;
10077         bits = malloc(bits_nr * sizeof(struct block_info));
10078         if (!bits) {
10079                 perror("malloc");
10080                 exit(1);
10081         }
10082
10083         if (ctx.progress_enabled) {
10084                 ctx.tp = TASK_EXTENTS;
10085                 task_start(ctx.info);
10086         }
10087
10088 again:
10089         root1 = fs_info->tree_root;
10090         level = btrfs_header_level(root1->node);
10091         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10092                                     root1->node->start, 0, level, 0, NULL);
10093         if (ret < 0)
10094                 goto out;
10095         root1 = fs_info->chunk_root;
10096         level = btrfs_header_level(root1->node);
10097         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10098                                     root1->node->start, 0, level, 0, NULL);
10099         if (ret < 0)
10100                 goto out;
10101         btrfs_init_path(&path);
10102         key.offset = 0;
10103         key.objectid = 0;
10104         key.type = BTRFS_ROOT_ITEM_KEY;
10105         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10106         if (ret < 0)
10107                 goto out;
10108         while(1) {
10109                 leaf = path.nodes[0];
10110                 slot = path.slots[0];
10111                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10112                         ret = btrfs_next_leaf(root, &path);
10113                         if (ret != 0)
10114                                 break;
10115                         leaf = path.nodes[0];
10116                         slot = path.slots[0];
10117                 }
10118                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10119                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10120                         unsigned long offset;
10121                         u64 last_snapshot;
10122
10123                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10124                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10125                         last_snapshot = btrfs_root_last_snapshot(&ri);
10126                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10127                                 level = btrfs_root_level(&ri);
10128                                 ret = add_root_item_to_list(&normal_trees,
10129                                                 found_key.objectid,
10130                                                 btrfs_root_bytenr(&ri),
10131                                                 last_snapshot, level,
10132                                                 0, NULL);
10133                                 if (ret < 0)
10134                                         goto out;
10135                         } else {
10136                                 level = btrfs_root_level(&ri);
10137                                 objectid = found_key.objectid;
10138                                 btrfs_disk_key_to_cpu(&found_key,
10139                                                       &ri.drop_progress);
10140                                 ret = add_root_item_to_list(&dropping_trees,
10141                                                 objectid,
10142                                                 btrfs_root_bytenr(&ri),
10143                                                 last_snapshot, level,
10144                                                 ri.drop_level, &found_key);
10145                                 if (ret < 0)
10146                                         goto out;
10147                         }
10148                 }
10149                 path.slots[0]++;
10150         }
10151         btrfs_release_path(&path);
10152
10153         /*
10154          * check_block can return -EAGAIN if it fixes something, please keep
10155          * this in mind when dealing with return values from these functions, if
10156          * we get -EAGAIN we want to fall through and restart the loop.
10157          */
10158         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10159                                   &seen, &reada, &nodes, &extent_cache,
10160                                   &chunk_cache, &dev_cache, &block_group_cache,
10161                                   &dev_extent_cache);
10162         if (ret < 0) {
10163                 if (ret == -EAGAIN)
10164                         goto loop;
10165                 goto out;
10166         }
10167         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10168                                   &pending, &seen, &reada, &nodes,
10169                                   &extent_cache, &chunk_cache, &dev_cache,
10170                                   &block_group_cache, &dev_extent_cache);
10171         if (ret < 0) {
10172                 if (ret == -EAGAIN)
10173                         goto loop;
10174                 goto out;
10175         }
10176
10177         ret = check_chunks(&chunk_cache, &block_group_cache,
10178                            &dev_extent_cache, NULL, NULL, NULL, 0);
10179         if (ret) {
10180                 if (ret == -EAGAIN)
10181                         goto loop;
10182                 err = ret;
10183         }
10184
10185         ret = check_extent_refs(root, &extent_cache);
10186         if (ret < 0) {
10187                 if (ret == -EAGAIN)
10188                         goto loop;
10189                 goto out;
10190         }
10191
10192         ret = check_devices(&dev_cache, &dev_extent_cache);
10193         if (ret && err)
10194                 ret = err;
10195
10196 out:
10197         task_stop(ctx.info);
10198         if (repair) {
10199                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10200                 extent_io_tree_cleanup(&excluded_extents);
10201                 fs_info->fsck_extent_cache = NULL;
10202                 fs_info->free_extent_hook = NULL;
10203                 fs_info->corrupt_blocks = NULL;
10204                 fs_info->excluded_extents = NULL;
10205         }
10206         free(bits);
10207         free_chunk_cache_tree(&chunk_cache);
10208         free_device_cache_tree(&dev_cache);
10209         free_block_group_tree(&block_group_cache);
10210         free_device_extent_tree(&dev_extent_cache);
10211         free_extent_cache_tree(&seen);
10212         free_extent_cache_tree(&pending);
10213         free_extent_cache_tree(&reada);
10214         free_extent_cache_tree(&nodes);
10215         free_root_item_list(&normal_trees);
10216         free_root_item_list(&dropping_trees);
10217         return ret;
10218 loop:
10219         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10220         free_extent_cache_tree(&seen);
10221         free_extent_cache_tree(&pending);
10222         free_extent_cache_tree(&reada);
10223         free_extent_cache_tree(&nodes);
10224         free_chunk_cache_tree(&chunk_cache);
10225         free_block_group_tree(&block_group_cache);
10226         free_device_cache_tree(&dev_cache);
10227         free_device_extent_tree(&dev_extent_cache);
10228         free_extent_record_cache(&extent_cache);
10229         free_root_item_list(&normal_trees);
10230         free_root_item_list(&dropping_trees);
10231         extent_io_tree_cleanup(&excluded_extents);
10232         goto again;
10233 }
10234
10235 /*
10236  * Check backrefs of a tree block given by @bytenr or @eb.
10237  *
10238  * @root:       the root containing the @bytenr or @eb
10239  * @eb:         tree block extent buffer, can be NULL
10240  * @bytenr:     bytenr of the tree block to search
10241  * @level:      tree level of the tree block
10242  * @owner:      owner of the tree block
10243  *
10244  * Return >0 for any error found and output error message
10245  * Return 0 for no error found
10246  */
10247 static int check_tree_block_ref(struct btrfs_root *root,
10248                                 struct extent_buffer *eb, u64 bytenr,
10249                                 int level, u64 owner)
10250 {
10251         struct btrfs_key key;
10252         struct btrfs_root *extent_root = root->fs_info->extent_root;
10253         struct btrfs_path path;
10254         struct btrfs_extent_item *ei;
10255         struct btrfs_extent_inline_ref *iref;
10256         struct extent_buffer *leaf;
10257         unsigned long end;
10258         unsigned long ptr;
10259         int slot;
10260         int skinny_level;
10261         int type;
10262         u32 nodesize = root->fs_info->nodesize;
10263         u32 item_size;
10264         u64 offset;
10265         int tree_reloc_root = 0;
10266         int found_ref = 0;
10267         int err = 0;
10268         int ret;
10269
10270         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10271             btrfs_header_bytenr(root->node) == bytenr)
10272                 tree_reloc_root = 1;
10273
10274         btrfs_init_path(&path);
10275         key.objectid = bytenr;
10276         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10277                 key.type = BTRFS_METADATA_ITEM_KEY;
10278         else
10279                 key.type = BTRFS_EXTENT_ITEM_KEY;
10280         key.offset = (u64)-1;
10281
10282         /* Search for the backref in extent tree */
10283         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10284         if (ret < 0) {
10285                 err |= BACKREF_MISSING;
10286                 goto out;
10287         }
10288         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10289         if (ret) {
10290                 err |= BACKREF_MISSING;
10291                 goto out;
10292         }
10293
10294         leaf = path.nodes[0];
10295         slot = path.slots[0];
10296         btrfs_item_key_to_cpu(leaf, &key, slot);
10297
10298         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10299
10300         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10301                 skinny_level = (int)key.offset;
10302                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10303         } else {
10304                 struct btrfs_tree_block_info *info;
10305
10306                 info = (struct btrfs_tree_block_info *)(ei + 1);
10307                 skinny_level = btrfs_tree_block_level(leaf, info);
10308                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10309         }
10310
10311         if (eb) {
10312                 u64 header_gen;
10313                 u64 extent_gen;
10314
10315                 if (!(btrfs_extent_flags(leaf, ei) &
10316                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10317                         error(
10318                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10319                                 key.objectid, nodesize,
10320                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10321                         err = BACKREF_MISMATCH;
10322                 }
10323                 header_gen = btrfs_header_generation(eb);
10324                 extent_gen = btrfs_extent_generation(leaf, ei);
10325                 if (header_gen != extent_gen) {
10326                         error(
10327         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10328                                 key.objectid, nodesize, header_gen,
10329                                 extent_gen);
10330                         err = BACKREF_MISMATCH;
10331                 }
10332                 if (level != skinny_level) {
10333                         error(
10334                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10335                                 key.objectid, nodesize, level, skinny_level);
10336                         err = BACKREF_MISMATCH;
10337                 }
10338                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10339                         error(
10340                         "extent[%llu %u] is referred by other roots than %llu",
10341                                 key.objectid, nodesize, root->objectid);
10342                         err = BACKREF_MISMATCH;
10343                 }
10344         }
10345
10346         /*
10347          * Iterate the extent/metadata item to find the exact backref
10348          */
10349         item_size = btrfs_item_size_nr(leaf, slot);
10350         ptr = (unsigned long)iref;
10351         end = (unsigned long)ei + item_size;
10352         while (ptr < end) {
10353                 iref = (struct btrfs_extent_inline_ref *)ptr;
10354                 type = btrfs_extent_inline_ref_type(leaf, iref);
10355                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10356
10357                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10358                         (offset == root->objectid || offset == owner)) {
10359                         found_ref = 1;
10360                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10361                         /*
10362                          * Backref of tree reloc root points to itself, no need
10363                          * to check backref any more.
10364                          */
10365                         if (tree_reloc_root)
10366                                 found_ref = 1;
10367                         else
10368                         /* Check if the backref points to valid referencer */
10369                                 found_ref = !check_tree_block_ref(root, NULL,
10370                                                 offset, level + 1, owner);
10371                 }
10372
10373                 if (found_ref)
10374                         break;
10375                 ptr += btrfs_extent_inline_ref_size(type);
10376         }
10377
10378         /*
10379          * Inlined extent item doesn't have what we need, check
10380          * TREE_BLOCK_REF_KEY
10381          */
10382         if (!found_ref) {
10383                 btrfs_release_path(&path);
10384                 key.objectid = bytenr;
10385                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10386                 key.offset = root->objectid;
10387
10388                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10389                 if (!ret)
10390                         found_ref = 1;
10391         }
10392         if (!found_ref)
10393                 err |= BACKREF_MISSING;
10394 out:
10395         btrfs_release_path(&path);
10396         if (eb && (err & BACKREF_MISSING))
10397                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10398                         bytenr, nodesize, owner, level);
10399         return err;
10400 }
10401
10402 /*
10403  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10404  *
10405  * Return >0 any error found and output error message
10406  * Return 0 for no error found
10407  */
10408 static int check_extent_data_item(struct btrfs_root *root,
10409                                   struct extent_buffer *eb, int slot)
10410 {
10411         struct btrfs_file_extent_item *fi;
10412         struct btrfs_path path;
10413         struct btrfs_root *extent_root = root->fs_info->extent_root;
10414         struct btrfs_key fi_key;
10415         struct btrfs_key dbref_key;
10416         struct extent_buffer *leaf;
10417         struct btrfs_extent_item *ei;
10418         struct btrfs_extent_inline_ref *iref;
10419         struct btrfs_extent_data_ref *dref;
10420         u64 owner;
10421         u64 disk_bytenr;
10422         u64 disk_num_bytes;
10423         u64 extent_num_bytes;
10424         u64 extent_flags;
10425         u32 item_size;
10426         unsigned long end;
10427         unsigned long ptr;
10428         int type;
10429         u64 ref_root;
10430         int found_dbackref = 0;
10431         int err = 0;
10432         int ret;
10433
10434         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10435         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10436
10437         /* Nothing to check for hole and inline data extents */
10438         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10439             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10440                 return 0;
10441
10442         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10443         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10444         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10445
10446         /* Check unaligned disk_num_bytes and num_bytes */
10447         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10448                 error(
10449 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10450                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10451                         root->fs_info->sectorsize);
10452                 err |= BYTES_UNALIGNED;
10453         } else {
10454                 data_bytes_allocated += disk_num_bytes;
10455         }
10456         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10457                 error(
10458 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10459                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10460                         root->fs_info->sectorsize);
10461                 err |= BYTES_UNALIGNED;
10462         } else {
10463                 data_bytes_referenced += extent_num_bytes;
10464         }
10465         owner = btrfs_header_owner(eb);
10466
10467         /* Check the extent item of the file extent in extent tree */
10468         btrfs_init_path(&path);
10469         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10470         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10471         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10472
10473         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10474         if (ret)
10475                 goto out;
10476
10477         leaf = path.nodes[0];
10478         slot = path.slots[0];
10479         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10480
10481         extent_flags = btrfs_extent_flags(leaf, ei);
10482
10483         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10484                 error(
10485                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10486                     disk_bytenr, disk_num_bytes,
10487                     BTRFS_EXTENT_FLAG_DATA);
10488                 err |= BACKREF_MISMATCH;
10489         }
10490
10491         /* Check data backref inside that extent item */
10492         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10493         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10494         ptr = (unsigned long)iref;
10495         end = (unsigned long)ei + item_size;
10496         while (ptr < end) {
10497                 iref = (struct btrfs_extent_inline_ref *)ptr;
10498                 type = btrfs_extent_inline_ref_type(leaf, iref);
10499                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10500
10501                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10502                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10503                         if (ref_root == owner || ref_root == root->objectid)
10504                                 found_dbackref = 1;
10505                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10506                         found_dbackref = !check_tree_block_ref(root, NULL,
10507                                 btrfs_extent_inline_ref_offset(leaf, iref),
10508                                 0, owner);
10509                 }
10510
10511                 if (found_dbackref)
10512                         break;
10513                 ptr += btrfs_extent_inline_ref_size(type);
10514         }
10515
10516         if (!found_dbackref) {
10517                 btrfs_release_path(&path);
10518
10519                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10520                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10521                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10522                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10523                                 fi_key.objectid, fi_key.offset);
10524
10525                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10526                                         &dbref_key, &path, 0, 0);
10527                 if (!ret) {
10528                         found_dbackref = 1;
10529                         goto out;
10530                 }
10531
10532                 btrfs_release_path(&path);
10533
10534                 /*
10535                  * Neither inlined nor EXTENT_DATA_REF found, try
10536                  * SHARED_DATA_REF as last chance.
10537                  */
10538                 dbref_key.objectid = disk_bytenr;
10539                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10540                 dbref_key.offset = eb->start;
10541
10542                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10543                                         &dbref_key, &path, 0, 0);
10544                 if (!ret) {
10545                         found_dbackref = 1;
10546                         goto out;
10547                 }
10548         }
10549
10550 out:
10551         if (!found_dbackref)
10552                 err |= BACKREF_MISSING;
10553         btrfs_release_path(&path);
10554         if (err & BACKREF_MISSING) {
10555                 error("data extent[%llu %llu] backref lost",
10556                       disk_bytenr, disk_num_bytes);
10557         }
10558         return err;
10559 }
10560
10561 /*
10562  * Get real tree block level for the case like shared block
10563  * Return >= 0 as tree level
10564  * Return <0 for error
10565  */
10566 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10567 {
10568         struct extent_buffer *eb;
10569         struct btrfs_path path;
10570         struct btrfs_key key;
10571         struct btrfs_extent_item *ei;
10572         u64 flags;
10573         u64 transid;
10574         u8 backref_level;
10575         u8 header_level;
10576         int ret;
10577
10578         /* Search extent tree for extent generation and level */
10579         key.objectid = bytenr;
10580         key.type = BTRFS_METADATA_ITEM_KEY;
10581         key.offset = (u64)-1;
10582
10583         btrfs_init_path(&path);
10584         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10585         if (ret < 0)
10586                 goto release_out;
10587         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10588         if (ret < 0)
10589                 goto release_out;
10590         if (ret > 0) {
10591                 ret = -ENOENT;
10592                 goto release_out;
10593         }
10594
10595         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10596         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10597                             struct btrfs_extent_item);
10598         flags = btrfs_extent_flags(path.nodes[0], ei);
10599         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10600                 ret = -ENOENT;
10601                 goto release_out;
10602         }
10603
10604         /* Get transid for later read_tree_block() check */
10605         transid = btrfs_extent_generation(path.nodes[0], ei);
10606
10607         /* Get backref level as one source */
10608         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10609                 backref_level = key.offset;
10610         } else {
10611                 struct btrfs_tree_block_info *info;
10612
10613                 info = (struct btrfs_tree_block_info *)(ei + 1);
10614                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10615         }
10616         btrfs_release_path(&path);
10617
10618         /* Get level from tree block as an alternative source */
10619         eb = read_tree_block(fs_info, bytenr, transid);
10620         if (!extent_buffer_uptodate(eb)) {
10621                 free_extent_buffer(eb);
10622                 return -EIO;
10623         }
10624         header_level = btrfs_header_level(eb);
10625         free_extent_buffer(eb);
10626
10627         if (header_level != backref_level)
10628                 return -EIO;
10629         return header_level;
10630
10631 release_out:
10632         btrfs_release_path(&path);
10633         return ret;
10634 }
10635
10636 /*
10637  * Check if a tree block backref is valid (points to a valid tree block)
10638  * if level == -1, level will be resolved
10639  * Return >0 for any error found and print error message
10640  */
10641 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10642                                     u64 bytenr, int level)
10643 {
10644         struct btrfs_root *root;
10645         struct btrfs_key key;
10646         struct btrfs_path path;
10647         struct extent_buffer *eb;
10648         struct extent_buffer *node;
10649         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10650         int err = 0;
10651         int ret;
10652
10653         /* Query level for level == -1 special case */
10654         if (level == -1)
10655                 level = query_tree_block_level(fs_info, bytenr);
10656         if (level < 0) {
10657                 err |= REFERENCER_MISSING;
10658                 goto out;
10659         }
10660
10661         key.objectid = root_id;
10662         key.type = BTRFS_ROOT_ITEM_KEY;
10663         key.offset = (u64)-1;
10664
10665         root = btrfs_read_fs_root(fs_info, &key);
10666         if (IS_ERR(root)) {
10667                 err |= REFERENCER_MISSING;
10668                 goto out;
10669         }
10670
10671         /* Read out the tree block to get item/node key */
10672         eb = read_tree_block(fs_info, bytenr, 0);
10673         if (!extent_buffer_uptodate(eb)) {
10674                 err |= REFERENCER_MISSING;
10675                 free_extent_buffer(eb);
10676                 goto out;
10677         }
10678
10679         /* Empty tree, no need to check key */
10680         if (!btrfs_header_nritems(eb) && !level) {
10681                 free_extent_buffer(eb);
10682                 goto out;
10683         }
10684
10685         if (level)
10686                 btrfs_node_key_to_cpu(eb, &key, 0);
10687         else
10688                 btrfs_item_key_to_cpu(eb, &key, 0);
10689
10690         free_extent_buffer(eb);
10691
10692         btrfs_init_path(&path);
10693         path.lowest_level = level;
10694         /* Search with the first key, to ensure we can reach it */
10695         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10696         if (ret < 0) {
10697                 err |= REFERENCER_MISSING;
10698                 goto release_out;
10699         }
10700
10701         node = path.nodes[level];
10702         if (btrfs_header_bytenr(node) != bytenr) {
10703                 error(
10704         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10705                         bytenr, nodesize, bytenr,
10706                         btrfs_header_bytenr(node));
10707                 err |= REFERENCER_MISMATCH;
10708         }
10709         if (btrfs_header_level(node) != level) {
10710                 error(
10711         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10712                         bytenr, nodesize, level,
10713                         btrfs_header_level(node));
10714                 err |= REFERENCER_MISMATCH;
10715         }
10716
10717 release_out:
10718         btrfs_release_path(&path);
10719 out:
10720         if (err & REFERENCER_MISSING) {
10721                 if (level < 0)
10722                         error("extent [%llu %d] lost referencer (owner: %llu)",
10723                                 bytenr, nodesize, root_id);
10724                 else
10725                         error(
10726                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10727                                 bytenr, nodesize, root_id, level);
10728         }
10729
10730         return err;
10731 }
10732
10733 /*
10734  * Check if tree block @eb is tree reloc root.
10735  * Return 0 if it's not or any problem happens
10736  * Return 1 if it's a tree reloc root
10737  */
10738 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10739                                  struct extent_buffer *eb)
10740 {
10741         struct btrfs_root *tree_reloc_root;
10742         struct btrfs_key key;
10743         u64 bytenr = btrfs_header_bytenr(eb);
10744         u64 owner = btrfs_header_owner(eb);
10745         int ret = 0;
10746
10747         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10748         key.offset = owner;
10749         key.type = BTRFS_ROOT_ITEM_KEY;
10750
10751         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10752         if (IS_ERR(tree_reloc_root))
10753                 return 0;
10754
10755         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10756                 ret = 1;
10757         btrfs_free_fs_root(tree_reloc_root);
10758         return ret;
10759 }
10760
10761 /*
10762  * Check referencer for shared block backref
10763  * If level == -1, this function will resolve the level.
10764  */
10765 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10766                                      u64 parent, u64 bytenr, int level)
10767 {
10768         struct extent_buffer *eb;
10769         u32 nr;
10770         int found_parent = 0;
10771         int i;
10772
10773         eb = read_tree_block(fs_info, parent, 0);
10774         if (!extent_buffer_uptodate(eb))
10775                 goto out;
10776
10777         if (level == -1)
10778                 level = query_tree_block_level(fs_info, bytenr);
10779         if (level < 0)
10780                 goto out;
10781
10782         /* It's possible it's a tree reloc root */
10783         if (parent == bytenr) {
10784                 if (is_tree_reloc_root(fs_info, eb))
10785                         found_parent = 1;
10786                 goto out;
10787         }
10788
10789         if (level + 1 != btrfs_header_level(eb))
10790                 goto out;
10791
10792         nr = btrfs_header_nritems(eb);
10793         for (i = 0; i < nr; i++) {
10794                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10795                         found_parent = 1;
10796                         break;
10797                 }
10798         }
10799 out:
10800         free_extent_buffer(eb);
10801         if (!found_parent) {
10802                 error(
10803         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10804                         bytenr, fs_info->nodesize, parent, level);
10805                 return REFERENCER_MISSING;
10806         }
10807         return 0;
10808 }
10809
10810 /*
10811  * Check referencer for normal (inlined) data ref
10812  * If len == 0, it will be resolved by searching in extent tree
10813  */
10814 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10815                                      u64 root_id, u64 objectid, u64 offset,
10816                                      u64 bytenr, u64 len, u32 count)
10817 {
10818         struct btrfs_root *root;
10819         struct btrfs_root *extent_root = fs_info->extent_root;
10820         struct btrfs_key key;
10821         struct btrfs_path path;
10822         struct extent_buffer *leaf;
10823         struct btrfs_file_extent_item *fi;
10824         u32 found_count = 0;
10825         int slot;
10826         int ret = 0;
10827
10828         if (!len) {
10829                 key.objectid = bytenr;
10830                 key.type = BTRFS_EXTENT_ITEM_KEY;
10831                 key.offset = (u64)-1;
10832
10833                 btrfs_init_path(&path);
10834                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10835                 if (ret < 0)
10836                         goto out;
10837                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10838                 if (ret)
10839                         goto out;
10840                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10841                 if (key.objectid != bytenr ||
10842                     key.type != BTRFS_EXTENT_ITEM_KEY)
10843                         goto out;
10844                 len = key.offset;
10845                 btrfs_release_path(&path);
10846         }
10847         key.objectid = root_id;
10848         key.type = BTRFS_ROOT_ITEM_KEY;
10849         key.offset = (u64)-1;
10850         btrfs_init_path(&path);
10851
10852         root = btrfs_read_fs_root(fs_info, &key);
10853         if (IS_ERR(root))
10854                 goto out;
10855
10856         key.objectid = objectid;
10857         key.type = BTRFS_EXTENT_DATA_KEY;
10858         /*
10859          * It can be nasty as data backref offset is
10860          * file offset - file extent offset, which is smaller or
10861          * equal to original backref offset.  The only special case is
10862          * overflow.  So we need to special check and do further search.
10863          */
10864         key.offset = offset & (1ULL << 63) ? 0 : offset;
10865
10866         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10867         if (ret < 0)
10868                 goto out;
10869
10870         /*
10871          * Search afterwards to get correct one
10872          * NOTE: As we must do a comprehensive check on the data backref to
10873          * make sure the dref count also matches, we must iterate all file
10874          * extents for that inode.
10875          */
10876         while (1) {
10877                 leaf = path.nodes[0];
10878                 slot = path.slots[0];
10879
10880                 if (slot >= btrfs_header_nritems(leaf))
10881                         goto next;
10882                 btrfs_item_key_to_cpu(leaf, &key, slot);
10883                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10884                         break;
10885                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10886                 /*
10887                  * Except normal disk bytenr and disk num bytes, we still
10888                  * need to do extra check on dbackref offset as
10889                  * dbackref offset = file_offset - file_extent_offset
10890                  */
10891                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10892                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10893                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10894                     offset)
10895                         found_count++;
10896
10897 next:
10898                 ret = btrfs_next_item(root, &path);
10899                 if (ret)
10900                         break;
10901         }
10902 out:
10903         btrfs_release_path(&path);
10904         if (found_count != count) {
10905                 error(
10906 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10907                         bytenr, len, root_id, objectid, offset, count, found_count);
10908                 return REFERENCER_MISSING;
10909         }
10910         return 0;
10911 }
10912
10913 /*
10914  * Check if the referencer of a shared data backref exists
10915  */
10916 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10917                                      u64 parent, u64 bytenr)
10918 {
10919         struct extent_buffer *eb;
10920         struct btrfs_key key;
10921         struct btrfs_file_extent_item *fi;
10922         u32 nr;
10923         int found_parent = 0;
10924         int i;
10925
10926         eb = read_tree_block(fs_info, parent, 0);
10927         if (!extent_buffer_uptodate(eb))
10928                 goto out;
10929
10930         nr = btrfs_header_nritems(eb);
10931         for (i = 0; i < nr; i++) {
10932                 btrfs_item_key_to_cpu(eb, &key, i);
10933                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10934                         continue;
10935
10936                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10937                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10938                         continue;
10939
10940                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10941                         found_parent = 1;
10942                         break;
10943                 }
10944         }
10945
10946 out:
10947         free_extent_buffer(eb);
10948         if (!found_parent) {
10949                 error("shared extent %llu referencer lost (parent: %llu)",
10950                         bytenr, parent);
10951                 return REFERENCER_MISSING;
10952         }
10953         return 0;
10954 }
10955
10956 /*
10957  * This function will check a given extent item, including its backref and
10958  * itself (like crossing stripe boundary and type)
10959  *
10960  * Since we don't use extent_record anymore, introduce new error bit
10961  */
10962 static int check_extent_item(struct btrfs_fs_info *fs_info,
10963                              struct extent_buffer *eb, int slot)
10964 {
10965         struct btrfs_extent_item *ei;
10966         struct btrfs_extent_inline_ref *iref;
10967         struct btrfs_extent_data_ref *dref;
10968         unsigned long end;
10969         unsigned long ptr;
10970         int type;
10971         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10972         u32 item_size = btrfs_item_size_nr(eb, slot);
10973         u64 flags;
10974         u64 offset;
10975         int metadata = 0;
10976         int level;
10977         struct btrfs_key key;
10978         int ret;
10979         int err = 0;
10980
10981         btrfs_item_key_to_cpu(eb, &key, slot);
10982         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10983                 bytes_used += key.offset;
10984         else
10985                 bytes_used += nodesize;
10986
10987         if (item_size < sizeof(*ei)) {
10988                 /*
10989                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10990                  * old thing when on disk format is still un-determined.
10991                  * No need to care about it anymore
10992                  */
10993                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10994                 return -ENOTTY;
10995         }
10996
10997         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10998         flags = btrfs_extent_flags(eb, ei);
10999
11000         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11001                 metadata = 1;
11002         if (metadata && check_crossing_stripes(global_info, key.objectid,
11003                                                eb->len)) {
11004                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11005                       key.objectid, key.objectid + nodesize);
11006                 err |= CROSSING_STRIPE_BOUNDARY;
11007         }
11008
11009         ptr = (unsigned long)(ei + 1);
11010
11011         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11012                 /* Old EXTENT_ITEM metadata */
11013                 struct btrfs_tree_block_info *info;
11014
11015                 info = (struct btrfs_tree_block_info *)ptr;
11016                 level = btrfs_tree_block_level(eb, info);
11017                 ptr += sizeof(struct btrfs_tree_block_info);
11018         } else {
11019                 /* New METADATA_ITEM */
11020                 level = key.offset;
11021         }
11022         end = (unsigned long)ei + item_size;
11023
11024 next:
11025         /* Reached extent item end normally */
11026         if (ptr == end)
11027                 goto out;
11028
11029         /* Beyond extent item end, wrong item size */
11030         if (ptr > end) {
11031                 err |= ITEM_SIZE_MISMATCH;
11032                 error("extent item at bytenr %llu slot %d has wrong size",
11033                         eb->start, slot);
11034                 goto out;
11035         }
11036
11037         /* Now check every backref in this extent item */
11038         iref = (struct btrfs_extent_inline_ref *)ptr;
11039         type = btrfs_extent_inline_ref_type(eb, iref);
11040         offset = btrfs_extent_inline_ref_offset(eb, iref);
11041         switch (type) {
11042         case BTRFS_TREE_BLOCK_REF_KEY:
11043                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11044                                                level);
11045                 err |= ret;
11046                 break;
11047         case BTRFS_SHARED_BLOCK_REF_KEY:
11048                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11049                                                  level);
11050                 err |= ret;
11051                 break;
11052         case BTRFS_EXTENT_DATA_REF_KEY:
11053                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11054                 ret = check_extent_data_backref(fs_info,
11055                                 btrfs_extent_data_ref_root(eb, dref),
11056                                 btrfs_extent_data_ref_objectid(eb, dref),
11057                                 btrfs_extent_data_ref_offset(eb, dref),
11058                                 key.objectid, key.offset,
11059                                 btrfs_extent_data_ref_count(eb, dref));
11060                 err |= ret;
11061                 break;
11062         case BTRFS_SHARED_DATA_REF_KEY:
11063                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11064                 err |= ret;
11065                 break;
11066         default:
11067                 error("extent[%llu %d %llu] has unknown ref type: %d",
11068                         key.objectid, key.type, key.offset, type);
11069                 err |= UNKNOWN_TYPE;
11070                 goto out;
11071         }
11072
11073         ptr += btrfs_extent_inline_ref_size(type);
11074         goto next;
11075
11076 out:
11077         return err;
11078 }
11079
11080 /*
11081  * Check if a dev extent item is referred correctly by its chunk
11082  */
11083 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11084                                  struct extent_buffer *eb, int slot)
11085 {
11086         struct btrfs_root *chunk_root = fs_info->chunk_root;
11087         struct btrfs_dev_extent *ptr;
11088         struct btrfs_path path;
11089         struct btrfs_key chunk_key;
11090         struct btrfs_key devext_key;
11091         struct btrfs_chunk *chunk;
11092         struct extent_buffer *l;
11093         int num_stripes;
11094         u64 length;
11095         int i;
11096         int found_chunk = 0;
11097         int ret;
11098
11099         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11100         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11101         length = btrfs_dev_extent_length(eb, ptr);
11102
11103         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11104         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11105         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11106
11107         btrfs_init_path(&path);
11108         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11109         if (ret)
11110                 goto out;
11111
11112         l = path.nodes[0];
11113         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11114         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11115                                       chunk_key.offset);
11116         if (ret < 0)
11117                 goto out;
11118
11119         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11120                 goto out;
11121
11122         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11123         for (i = 0; i < num_stripes; i++) {
11124                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11125                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11126
11127                 if (devid == devext_key.objectid &&
11128                     offset == devext_key.offset) {
11129                         found_chunk = 1;
11130                         break;
11131                 }
11132         }
11133 out:
11134         btrfs_release_path(&path);
11135         if (!found_chunk) {
11136                 error(
11137                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11138                         devext_key.objectid, devext_key.offset, length);
11139                 return REFERENCER_MISSING;
11140         }
11141         return 0;
11142 }
11143
11144 /*
11145  * Check if the used space is correct with the dev item
11146  */
11147 static int check_dev_item(struct btrfs_fs_info *fs_info,
11148                           struct extent_buffer *eb, int slot)
11149 {
11150         struct btrfs_root *dev_root = fs_info->dev_root;
11151         struct btrfs_dev_item *dev_item;
11152         struct btrfs_path path;
11153         struct btrfs_key key;
11154         struct btrfs_dev_extent *ptr;
11155         u64 dev_id;
11156         u64 used;
11157         u64 total = 0;
11158         int ret;
11159
11160         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11161         dev_id = btrfs_device_id(eb, dev_item);
11162         used = btrfs_device_bytes_used(eb, dev_item);
11163
11164         key.objectid = dev_id;
11165         key.type = BTRFS_DEV_EXTENT_KEY;
11166         key.offset = 0;
11167
11168         btrfs_init_path(&path);
11169         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11170         if (ret < 0) {
11171                 btrfs_item_key_to_cpu(eb, &key, slot);
11172                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11173                         key.objectid, key.type, key.offset);
11174                 btrfs_release_path(&path);
11175                 return REFERENCER_MISSING;
11176         }
11177
11178         /* Iterate dev_extents to calculate the used space of a device */
11179         while (1) {
11180                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11181                         goto next;
11182
11183                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11184                 if (key.objectid > dev_id)
11185                         break;
11186                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11187                         goto next;
11188
11189                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11190                                      struct btrfs_dev_extent);
11191                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11192 next:
11193                 ret = btrfs_next_item(dev_root, &path);
11194                 if (ret)
11195                         break;
11196         }
11197         btrfs_release_path(&path);
11198
11199         if (used != total) {
11200                 btrfs_item_key_to_cpu(eb, &key, slot);
11201                 error(
11202 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11203                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11204                         BTRFS_DEV_EXTENT_KEY, dev_id);
11205                 return ACCOUNTING_MISMATCH;
11206         }
11207         return 0;
11208 }
11209
11210 /*
11211  * Check a block group item with its referener (chunk) and its used space
11212  * with extent/metadata item
11213  */
11214 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11215                                   struct extent_buffer *eb, int slot)
11216 {
11217         struct btrfs_root *extent_root = fs_info->extent_root;
11218         struct btrfs_root *chunk_root = fs_info->chunk_root;
11219         struct btrfs_block_group_item *bi;
11220         struct btrfs_block_group_item bg_item;
11221         struct btrfs_path path;
11222         struct btrfs_key bg_key;
11223         struct btrfs_key chunk_key;
11224         struct btrfs_key extent_key;
11225         struct btrfs_chunk *chunk;
11226         struct extent_buffer *leaf;
11227         struct btrfs_extent_item *ei;
11228         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11229         u64 flags;
11230         u64 bg_flags;
11231         u64 used;
11232         u64 total = 0;
11233         int ret;
11234         int err = 0;
11235
11236         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11237         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11238         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11239         used = btrfs_block_group_used(&bg_item);
11240         bg_flags = btrfs_block_group_flags(&bg_item);
11241
11242         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11243         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11244         chunk_key.offset = bg_key.objectid;
11245
11246         btrfs_init_path(&path);
11247         /* Search for the referencer chunk */
11248         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11249         if (ret) {
11250                 error(
11251                 "block group[%llu %llu] did not find the related chunk item",
11252                         bg_key.objectid, bg_key.offset);
11253                 err |= REFERENCER_MISSING;
11254         } else {
11255                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11256                                         struct btrfs_chunk);
11257                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11258                                                 bg_key.offset) {
11259                         error(
11260         "block group[%llu %llu] related chunk item length does not match",
11261                                 bg_key.objectid, bg_key.offset);
11262                         err |= REFERENCER_MISMATCH;
11263                 }
11264         }
11265         btrfs_release_path(&path);
11266
11267         /* Search from the block group bytenr */
11268         extent_key.objectid = bg_key.objectid;
11269         extent_key.type = 0;
11270         extent_key.offset = 0;
11271
11272         btrfs_init_path(&path);
11273         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11274         if (ret < 0)
11275                 goto out;
11276
11277         /* Iterate extent tree to account used space */
11278         while (1) {
11279                 leaf = path.nodes[0];
11280
11281                 /* Search slot can point to the last item beyond leaf nritems */
11282                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11283                         goto next;
11284
11285                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11286                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11287                         break;
11288
11289                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11290                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11291                         goto next;
11292                 if (extent_key.objectid < bg_key.objectid)
11293                         goto next;
11294
11295                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11296                         total += nodesize;
11297                 else
11298                         total += extent_key.offset;
11299
11300                 ei = btrfs_item_ptr(leaf, path.slots[0],
11301                                     struct btrfs_extent_item);
11302                 flags = btrfs_extent_flags(leaf, ei);
11303                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11304                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11305                                 error(
11306                         "bad extent[%llu, %llu) type mismatch with chunk",
11307                                         extent_key.objectid,
11308                                         extent_key.objectid + extent_key.offset);
11309                                 err |= CHUNK_TYPE_MISMATCH;
11310                         }
11311                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11312                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11313                                     BTRFS_BLOCK_GROUP_METADATA))) {
11314                                 error(
11315                         "bad extent[%llu, %llu) type mismatch with chunk",
11316                                         extent_key.objectid,
11317                                         extent_key.objectid + nodesize);
11318                                 err |= CHUNK_TYPE_MISMATCH;
11319                         }
11320                 }
11321 next:
11322                 ret = btrfs_next_item(extent_root, &path);
11323                 if (ret)
11324                         break;
11325         }
11326
11327 out:
11328         btrfs_release_path(&path);
11329
11330         if (total != used) {
11331                 error(
11332                 "block group[%llu %llu] used %llu but extent items used %llu",
11333                         bg_key.objectid, bg_key.offset, used, total);
11334                 err |= ACCOUNTING_MISMATCH;
11335         }
11336         return err;
11337 }
11338
11339 /*
11340  * Check a chunk item.
11341  * Including checking all referred dev_extents and block group
11342  */
11343 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11344                             struct extent_buffer *eb, int slot)
11345 {
11346         struct btrfs_root *extent_root = fs_info->extent_root;
11347         struct btrfs_root *dev_root = fs_info->dev_root;
11348         struct btrfs_path path;
11349         struct btrfs_key chunk_key;
11350         struct btrfs_key bg_key;
11351         struct btrfs_key devext_key;
11352         struct btrfs_chunk *chunk;
11353         struct extent_buffer *leaf;
11354         struct btrfs_block_group_item *bi;
11355         struct btrfs_block_group_item bg_item;
11356         struct btrfs_dev_extent *ptr;
11357         u64 length;
11358         u64 chunk_end;
11359         u64 stripe_len;
11360         u64 type;
11361         int num_stripes;
11362         u64 offset;
11363         u64 objectid;
11364         int i;
11365         int ret;
11366         int err = 0;
11367
11368         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11369         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11370         length = btrfs_chunk_length(eb, chunk);
11371         chunk_end = chunk_key.offset + length;
11372         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11373                                       chunk_key.offset);
11374         if (ret < 0) {
11375                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11376                         chunk_end);
11377                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11378                 goto out;
11379         }
11380         type = btrfs_chunk_type(eb, chunk);
11381
11382         bg_key.objectid = chunk_key.offset;
11383         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11384         bg_key.offset = length;
11385
11386         btrfs_init_path(&path);
11387         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11388         if (ret) {
11389                 error(
11390                 "chunk[%llu %llu) did not find the related block group item",
11391                         chunk_key.offset, chunk_end);
11392                 err |= REFERENCER_MISSING;
11393         } else{
11394                 leaf = path.nodes[0];
11395                 bi = btrfs_item_ptr(leaf, path.slots[0],
11396                                     struct btrfs_block_group_item);
11397                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11398                                    sizeof(bg_item));
11399                 if (btrfs_block_group_flags(&bg_item) != type) {
11400                         error(
11401 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11402                                 chunk_key.offset, chunk_end, type,
11403                                 btrfs_block_group_flags(&bg_item));
11404                         err |= REFERENCER_MISSING;
11405                 }
11406         }
11407
11408         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11409         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11410         for (i = 0; i < num_stripes; i++) {
11411                 btrfs_release_path(&path);
11412                 btrfs_init_path(&path);
11413                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11414                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11415                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11416
11417                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11418                                         0, 0);
11419                 if (ret)
11420                         goto not_match_dev;
11421
11422                 leaf = path.nodes[0];
11423                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11424                                      struct btrfs_dev_extent);
11425                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11426                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11427                 if (objectid != chunk_key.objectid ||
11428                     offset != chunk_key.offset ||
11429                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11430                         goto not_match_dev;
11431                 continue;
11432 not_match_dev:
11433                 err |= BACKREF_MISSING;
11434                 error(
11435                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11436                         chunk_key.objectid, chunk_end, i);
11437                 continue;
11438         }
11439         btrfs_release_path(&path);
11440 out:
11441         return err;
11442 }
11443
11444 /*
11445  * Main entry function to check known items and update related accounting info
11446  */
11447 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11448 {
11449         struct btrfs_fs_info *fs_info = root->fs_info;
11450         struct btrfs_key key;
11451         int slot = 0;
11452         int type;
11453         struct btrfs_extent_data_ref *dref;
11454         int ret;
11455         int err = 0;
11456
11457 next:
11458         btrfs_item_key_to_cpu(eb, &key, slot);
11459         type = key.type;
11460
11461         switch (type) {
11462         case BTRFS_EXTENT_DATA_KEY:
11463                 ret = check_extent_data_item(root, eb, slot);
11464                 err |= ret;
11465                 break;
11466         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11467                 ret = check_block_group_item(fs_info, eb, slot);
11468                 err |= ret;
11469                 break;
11470         case BTRFS_DEV_ITEM_KEY:
11471                 ret = check_dev_item(fs_info, eb, slot);
11472                 err |= ret;
11473                 break;
11474         case BTRFS_CHUNK_ITEM_KEY:
11475                 ret = check_chunk_item(fs_info, eb, slot);
11476                 err |= ret;
11477                 break;
11478         case BTRFS_DEV_EXTENT_KEY:
11479                 ret = check_dev_extent_item(fs_info, eb, slot);
11480                 err |= ret;
11481                 break;
11482         case BTRFS_EXTENT_ITEM_KEY:
11483         case BTRFS_METADATA_ITEM_KEY:
11484                 ret = check_extent_item(fs_info, eb, slot);
11485                 err |= ret;
11486                 break;
11487         case BTRFS_EXTENT_CSUM_KEY:
11488                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11489                 break;
11490         case BTRFS_TREE_BLOCK_REF_KEY:
11491                 ret = check_tree_block_backref(fs_info, key.offset,
11492                                                key.objectid, -1);
11493                 err |= ret;
11494                 break;
11495         case BTRFS_EXTENT_DATA_REF_KEY:
11496                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11497                 ret = check_extent_data_backref(fs_info,
11498                                 btrfs_extent_data_ref_root(eb, dref),
11499                                 btrfs_extent_data_ref_objectid(eb, dref),
11500                                 btrfs_extent_data_ref_offset(eb, dref),
11501                                 key.objectid, 0,
11502                                 btrfs_extent_data_ref_count(eb, dref));
11503                 err |= ret;
11504                 break;
11505         case BTRFS_SHARED_BLOCK_REF_KEY:
11506                 ret = check_shared_block_backref(fs_info, key.offset,
11507                                                  key.objectid, -1);
11508                 err |= ret;
11509                 break;
11510         case BTRFS_SHARED_DATA_REF_KEY:
11511                 ret = check_shared_data_backref(fs_info, key.offset,
11512                                                 key.objectid);
11513                 err |= ret;
11514                 break;
11515         default:
11516                 break;
11517         }
11518
11519         if (++slot < btrfs_header_nritems(eb))
11520                 goto next;
11521
11522         return err;
11523 }
11524
11525 /*
11526  * Helper function for later fs/subvol tree check.  To determine if a tree
11527  * block should be checked.
11528  * This function will ensure only the direct referencer with lowest rootid to
11529  * check a fs/subvolume tree block.
11530  *
11531  * Backref check at extent tree would detect errors like missing subvolume
11532  * tree, so we can do aggressive check to reduce duplicated checks.
11533  */
11534 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11535 {
11536         struct btrfs_root *extent_root = root->fs_info->extent_root;
11537         struct btrfs_key key;
11538         struct btrfs_path path;
11539         struct extent_buffer *leaf;
11540         int slot;
11541         struct btrfs_extent_item *ei;
11542         unsigned long ptr;
11543         unsigned long end;
11544         int type;
11545         u32 item_size;
11546         u64 offset;
11547         struct btrfs_extent_inline_ref *iref;
11548         int ret;
11549
11550         btrfs_init_path(&path);
11551         key.objectid = btrfs_header_bytenr(eb);
11552         key.type = BTRFS_METADATA_ITEM_KEY;
11553         key.offset = (u64)-1;
11554
11555         /*
11556          * Any failure in backref resolving means we can't determine
11557          * whom the tree block belongs to.
11558          * So in that case, we need to check that tree block
11559          */
11560         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11561         if (ret < 0)
11562                 goto need_check;
11563
11564         ret = btrfs_previous_extent_item(extent_root, &path,
11565                                          btrfs_header_bytenr(eb));
11566         if (ret)
11567                 goto need_check;
11568
11569         leaf = path.nodes[0];
11570         slot = path.slots[0];
11571         btrfs_item_key_to_cpu(leaf, &key, slot);
11572         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11573
11574         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11575                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11576         } else {
11577                 struct btrfs_tree_block_info *info;
11578
11579                 info = (struct btrfs_tree_block_info *)(ei + 1);
11580                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11581         }
11582
11583         item_size = btrfs_item_size_nr(leaf, slot);
11584         ptr = (unsigned long)iref;
11585         end = (unsigned long)ei + item_size;
11586         while (ptr < end) {
11587                 iref = (struct btrfs_extent_inline_ref *)ptr;
11588                 type = btrfs_extent_inline_ref_type(leaf, iref);
11589                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11590
11591                 /*
11592                  * We only check the tree block if current root is
11593                  * the lowest referencer of it.
11594                  */
11595                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11596                     offset < root->objectid) {
11597                         btrfs_release_path(&path);
11598                         return 0;
11599                 }
11600
11601                 ptr += btrfs_extent_inline_ref_size(type);
11602         }
11603         /*
11604          * Normally we should also check keyed tree block ref, but that may be
11605          * very time consuming.  Inlined ref should already make us skip a lot
11606          * of refs now.  So skip search keyed tree block ref.
11607          */
11608
11609 need_check:
11610         btrfs_release_path(&path);
11611         return 1;
11612 }
11613
11614 /*
11615  * Traversal function for tree block. We will do:
11616  * 1) Skip shared fs/subvolume tree blocks
11617  * 2) Update related bytes accounting
11618  * 3) Pre-order traversal
11619  */
11620 static int traverse_tree_block(struct btrfs_root *root,
11621                                 struct extent_buffer *node)
11622 {
11623         struct extent_buffer *eb;
11624         struct btrfs_key key;
11625         struct btrfs_key drop_key;
11626         int level;
11627         u64 nr;
11628         int i;
11629         int err = 0;
11630         int ret;
11631
11632         /*
11633          * Skip shared fs/subvolume tree block, in that case they will
11634          * be checked by referencer with lowest rootid
11635          */
11636         if (is_fstree(root->objectid) && !should_check(root, node))
11637                 return 0;
11638
11639         /* Update bytes accounting */
11640         total_btree_bytes += node->len;
11641         if (fs_root_objectid(btrfs_header_owner(node)))
11642                 total_fs_tree_bytes += node->len;
11643         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11644                 total_extent_tree_bytes += node->len;
11645
11646         /* pre-order tranversal, check itself first */
11647         level = btrfs_header_level(node);
11648         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11649                                    btrfs_header_level(node),
11650                                    btrfs_header_owner(node));
11651         err |= ret;
11652         if (err)
11653                 error(
11654         "check %s failed root %llu bytenr %llu level %d, force continue check",
11655                         level ? "node":"leaf", root->objectid,
11656                         btrfs_header_bytenr(node), btrfs_header_level(node));
11657
11658         if (!level) {
11659                 btree_space_waste += btrfs_leaf_free_space(root, node);
11660                 ret = check_leaf_items(root, node);
11661                 err |= ret;
11662                 return err;
11663         }
11664
11665         nr = btrfs_header_nritems(node);
11666         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11667         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11668                 sizeof(struct btrfs_key_ptr);
11669
11670         /* Then check all its children */
11671         for (i = 0; i < nr; i++) {
11672                 u64 blocknr = btrfs_node_blockptr(node, i);
11673
11674                 btrfs_node_key_to_cpu(node, &key, i);
11675                 if (level == root->root_item.drop_level &&
11676                     is_dropped_key(&key, &drop_key))
11677                         continue;
11678
11679                 /*
11680                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11681                  * to call the function itself.
11682                  */
11683                 eb = read_tree_block(root->fs_info, blocknr, 0);
11684                 if (extent_buffer_uptodate(eb)) {
11685                         ret = traverse_tree_block(root, eb);
11686                         err |= ret;
11687                 }
11688                 free_extent_buffer(eb);
11689         }
11690
11691         return err;
11692 }
11693
11694 /*
11695  * Low memory usage version check_chunks_and_extents.
11696  */
11697 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11698 {
11699         struct btrfs_path path;
11700         struct btrfs_key key;
11701         struct btrfs_root *root1;
11702         struct btrfs_root *root;
11703         struct btrfs_root *cur_root;
11704         int err = 0;
11705         int ret;
11706
11707         root = fs_info->fs_root;
11708
11709         root1 = root->fs_info->chunk_root;
11710         ret = traverse_tree_block(root1, root1->node);
11711         err |= ret;
11712
11713         root1 = root->fs_info->tree_root;
11714         ret = traverse_tree_block(root1, root1->node);
11715         err |= ret;
11716
11717         btrfs_init_path(&path);
11718         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11719         key.offset = 0;
11720         key.type = BTRFS_ROOT_ITEM_KEY;
11721
11722         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11723         if (ret) {
11724                 error("cannot find extent treet in tree_root");
11725                 goto out;
11726         }
11727
11728         while (1) {
11729                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11730                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11731                         goto next;
11732                 key.offset = (u64)-1;
11733
11734                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11735                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11736                                         &key);
11737                 else
11738                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11739                 if (IS_ERR(cur_root) || !cur_root) {
11740                         error("failed to read tree: %lld", key.objectid);
11741                         goto next;
11742                 }
11743
11744                 ret = traverse_tree_block(cur_root, cur_root->node);
11745                 err |= ret;
11746
11747                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11748                         btrfs_free_fs_root(cur_root);
11749 next:
11750                 ret = btrfs_next_item(root1, &path);
11751                 if (ret)
11752                         goto out;
11753         }
11754
11755 out:
11756         btrfs_release_path(&path);
11757         return err;
11758 }
11759
11760 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11761 {
11762         int ret;
11763
11764         if (!ctx.progress_enabled)
11765                 fprintf(stderr, "checking extents\n");
11766         if (check_mode == CHECK_MODE_LOWMEM)
11767                 ret = check_chunks_and_extents_v2(fs_info);
11768         else
11769                 ret = check_chunks_and_extents(fs_info);
11770
11771         return ret;
11772 }
11773
11774 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11775                            struct btrfs_root *root, int overwrite)
11776 {
11777         struct extent_buffer *c;
11778         struct extent_buffer *old = root->node;
11779         int level;
11780         int ret;
11781         struct btrfs_disk_key disk_key = {0,0,0};
11782
11783         level = 0;
11784
11785         if (overwrite) {
11786                 c = old;
11787                 extent_buffer_get(c);
11788                 goto init;
11789         }
11790         c = btrfs_alloc_free_block(trans, root,
11791                                    root->fs_info->nodesize,
11792                                    root->root_key.objectid,
11793                                    &disk_key, level, 0, 0);
11794         if (IS_ERR(c)) {
11795                 c = old;
11796                 extent_buffer_get(c);
11797                 overwrite = 1;
11798         }
11799 init:
11800         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11801         btrfs_set_header_level(c, level);
11802         btrfs_set_header_bytenr(c, c->start);
11803         btrfs_set_header_generation(c, trans->transid);
11804         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11805         btrfs_set_header_owner(c, root->root_key.objectid);
11806
11807         write_extent_buffer(c, root->fs_info->fsid,
11808                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11809
11810         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11811                             btrfs_header_chunk_tree_uuid(c),
11812                             BTRFS_UUID_SIZE);
11813
11814         btrfs_mark_buffer_dirty(c);
11815         /*
11816          * this case can happen in the following case:
11817          *
11818          * 1.overwrite previous root.
11819          *
11820          * 2.reinit reloc data root, this is because we skip pin
11821          * down reloc data tree before which means we can allocate
11822          * same block bytenr here.
11823          */
11824         if (old->start == c->start) {
11825                 btrfs_set_root_generation(&root->root_item,
11826                                           trans->transid);
11827                 root->root_item.level = btrfs_header_level(root->node);
11828                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11829                                         &root->root_key, &root->root_item);
11830                 if (ret) {
11831                         free_extent_buffer(c);
11832                         return ret;
11833                 }
11834         }
11835         free_extent_buffer(old);
11836         root->node = c;
11837         add_root_to_dirty_list(root);
11838         return 0;
11839 }
11840
11841 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11842                                 struct extent_buffer *eb, int tree_root)
11843 {
11844         struct extent_buffer *tmp;
11845         struct btrfs_root_item *ri;
11846         struct btrfs_key key;
11847         u64 bytenr;
11848         int level = btrfs_header_level(eb);
11849         int nritems;
11850         int ret;
11851         int i;
11852
11853         /*
11854          * If we have pinned this block before, don't pin it again.
11855          * This can not only avoid forever loop with broken filesystem
11856          * but also give us some speedups.
11857          */
11858         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11859                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11860                 return 0;
11861
11862         btrfs_pin_extent(fs_info, eb->start, eb->len);
11863
11864         nritems = btrfs_header_nritems(eb);
11865         for (i = 0; i < nritems; i++) {
11866                 if (level == 0) {
11867                         btrfs_item_key_to_cpu(eb, &key, i);
11868                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11869                                 continue;
11870                         /* Skip the extent root and reloc roots */
11871                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11872                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11873                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11874                                 continue;
11875                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11876                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11877
11878                         /*
11879                          * If at any point we start needing the real root we
11880                          * will have to build a stump root for the root we are
11881                          * in, but for now this doesn't actually use the root so
11882                          * just pass in extent_root.
11883                          */
11884                         tmp = read_tree_block(fs_info, bytenr, 0);
11885                         if (!extent_buffer_uptodate(tmp)) {
11886                                 fprintf(stderr, "Error reading root block\n");
11887                                 return -EIO;
11888                         }
11889                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11890                         free_extent_buffer(tmp);
11891                         if (ret)
11892                                 return ret;
11893                 } else {
11894                         bytenr = btrfs_node_blockptr(eb, i);
11895
11896                         /* If we aren't the tree root don't read the block */
11897                         if (level == 1 && !tree_root) {
11898                                 btrfs_pin_extent(fs_info, bytenr,
11899                                                 fs_info->nodesize);
11900                                 continue;
11901                         }
11902
11903                         tmp = read_tree_block(fs_info, bytenr, 0);
11904                         if (!extent_buffer_uptodate(tmp)) {
11905                                 fprintf(stderr, "Error reading tree block\n");
11906                                 return -EIO;
11907                         }
11908                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11909                         free_extent_buffer(tmp);
11910                         if (ret)
11911                                 return ret;
11912                 }
11913         }
11914
11915         return 0;
11916 }
11917
11918 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11919 {
11920         int ret;
11921
11922         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11923         if (ret)
11924                 return ret;
11925
11926         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11927 }
11928
11929 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11930 {
11931         struct btrfs_block_group_cache *cache;
11932         struct btrfs_path path;
11933         struct extent_buffer *leaf;
11934         struct btrfs_chunk *chunk;
11935         struct btrfs_key key;
11936         int ret;
11937         u64 start;
11938
11939         btrfs_init_path(&path);
11940         key.objectid = 0;
11941         key.type = BTRFS_CHUNK_ITEM_KEY;
11942         key.offset = 0;
11943         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11944         if (ret < 0) {
11945                 btrfs_release_path(&path);
11946                 return ret;
11947         }
11948
11949         /*
11950          * We do this in case the block groups were screwed up and had alloc
11951          * bits that aren't actually set on the chunks.  This happens with
11952          * restored images every time and could happen in real life I guess.
11953          */
11954         fs_info->avail_data_alloc_bits = 0;
11955         fs_info->avail_metadata_alloc_bits = 0;
11956         fs_info->avail_system_alloc_bits = 0;
11957
11958         /* First we need to create the in-memory block groups */
11959         while (1) {
11960                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11961                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11962                         if (ret < 0) {
11963                                 btrfs_release_path(&path);
11964                                 return ret;
11965                         }
11966                         if (ret) {
11967                                 ret = 0;
11968                                 break;
11969                         }
11970                 }
11971                 leaf = path.nodes[0];
11972                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11973                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11974                         path.slots[0]++;
11975                         continue;
11976                 }
11977
11978                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11979                 btrfs_add_block_group(fs_info, 0,
11980                                       btrfs_chunk_type(leaf, chunk),
11981                                       key.objectid, key.offset,
11982                                       btrfs_chunk_length(leaf, chunk));
11983                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11984                                  key.offset + btrfs_chunk_length(leaf, chunk));
11985                 path.slots[0]++;
11986         }
11987         start = 0;
11988         while (1) {
11989                 cache = btrfs_lookup_first_block_group(fs_info, start);
11990                 if (!cache)
11991                         break;
11992                 cache->cached = 1;
11993                 start = cache->key.objectid + cache->key.offset;
11994         }
11995
11996         btrfs_release_path(&path);
11997         return 0;
11998 }
11999
12000 static int reset_balance(struct btrfs_trans_handle *trans,
12001                          struct btrfs_fs_info *fs_info)
12002 {
12003         struct btrfs_root *root = fs_info->tree_root;
12004         struct btrfs_path path;
12005         struct extent_buffer *leaf;
12006         struct btrfs_key key;
12007         int del_slot, del_nr = 0;
12008         int ret;
12009         int found = 0;
12010
12011         btrfs_init_path(&path);
12012         key.objectid = BTRFS_BALANCE_OBJECTID;
12013         key.type = BTRFS_BALANCE_ITEM_KEY;
12014         key.offset = 0;
12015         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12016         if (ret) {
12017                 if (ret > 0)
12018                         ret = 0;
12019                 if (!ret)
12020                         goto reinit_data_reloc;
12021                 else
12022                         goto out;
12023         }
12024
12025         ret = btrfs_del_item(trans, root, &path);
12026         if (ret)
12027                 goto out;
12028         btrfs_release_path(&path);
12029
12030         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12031         key.type = BTRFS_ROOT_ITEM_KEY;
12032         key.offset = 0;
12033         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12034         if (ret < 0)
12035                 goto out;
12036         while (1) {
12037                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12038                         if (!found)
12039                                 break;
12040
12041                         if (del_nr) {
12042                                 ret = btrfs_del_items(trans, root, &path,
12043                                                       del_slot, del_nr);
12044                                 del_nr = 0;
12045                                 if (ret)
12046                                         goto out;
12047                         }
12048                         key.offset++;
12049                         btrfs_release_path(&path);
12050
12051                         found = 0;
12052                         ret = btrfs_search_slot(trans, root, &key, &path,
12053                                                 -1, 1);
12054                         if (ret < 0)
12055                                 goto out;
12056                         continue;
12057                 }
12058                 found = 1;
12059                 leaf = path.nodes[0];
12060                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12061                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12062                         break;
12063                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12064                         path.slots[0]++;
12065                         continue;
12066                 }
12067                 if (!del_nr) {
12068                         del_slot = path.slots[0];
12069                         del_nr = 1;
12070                 } else {
12071                         del_nr++;
12072                 }
12073                 path.slots[0]++;
12074         }
12075
12076         if (del_nr) {
12077                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12078                 if (ret)
12079                         goto out;
12080         }
12081         btrfs_release_path(&path);
12082
12083 reinit_data_reloc:
12084         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12085         key.type = BTRFS_ROOT_ITEM_KEY;
12086         key.offset = (u64)-1;
12087         root = btrfs_read_fs_root(fs_info, &key);
12088         if (IS_ERR(root)) {
12089                 fprintf(stderr, "Error reading data reloc tree\n");
12090                 ret = PTR_ERR(root);
12091                 goto out;
12092         }
12093         record_root_in_trans(trans, root);
12094         ret = btrfs_fsck_reinit_root(trans, root, 0);
12095         if (ret)
12096                 goto out;
12097         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12098 out:
12099         btrfs_release_path(&path);
12100         return ret;
12101 }
12102
12103 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12104                               struct btrfs_fs_info *fs_info)
12105 {
12106         u64 start = 0;
12107         int ret;
12108
12109         /*
12110          * The only reason we don't do this is because right now we're just
12111          * walking the trees we find and pinning down their bytes, we don't look
12112          * at any of the leaves.  In order to do mixed groups we'd have to check
12113          * the leaves of any fs roots and pin down the bytes for any file
12114          * extents we find.  Not hard but why do it if we don't have to?
12115          */
12116         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12117                 fprintf(stderr, "We don't support re-initing the extent tree "
12118                         "for mixed block groups yet, please notify a btrfs "
12119                         "developer you want to do this so they can add this "
12120                         "functionality.\n");
12121                 return -EINVAL;
12122         }
12123
12124         /*
12125          * first we need to walk all of the trees except the extent tree and pin
12126          * down the bytes that are in use so we don't overwrite any existing
12127          * metadata.
12128          */
12129         ret = pin_metadata_blocks(fs_info);
12130         if (ret) {
12131                 fprintf(stderr, "error pinning down used bytes\n");
12132                 return ret;
12133         }
12134
12135         /*
12136          * Need to drop all the block groups since we're going to recreate all
12137          * of them again.
12138          */
12139         btrfs_free_block_groups(fs_info);
12140         ret = reset_block_groups(fs_info);
12141         if (ret) {
12142                 fprintf(stderr, "error resetting the block groups\n");
12143                 return ret;
12144         }
12145
12146         /* Ok we can allocate now, reinit the extent root */
12147         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12148         if (ret) {
12149                 fprintf(stderr, "extent root initialization failed\n");
12150                 /*
12151                  * When the transaction code is updated we should end the
12152                  * transaction, but for now progs only knows about commit so
12153                  * just return an error.
12154                  */
12155                 return ret;
12156         }
12157
12158         /*
12159          * Now we have all the in-memory block groups setup so we can make
12160          * allocations properly, and the metadata we care about is safe since we
12161          * pinned all of it above.
12162          */
12163         while (1) {
12164                 struct btrfs_block_group_cache *cache;
12165
12166                 cache = btrfs_lookup_first_block_group(fs_info, start);
12167                 if (!cache)
12168                         break;
12169                 start = cache->key.objectid + cache->key.offset;
12170                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12171                                         &cache->key, &cache->item,
12172                                         sizeof(cache->item));
12173                 if (ret) {
12174                         fprintf(stderr, "Error adding block group\n");
12175                         return ret;
12176                 }
12177                 btrfs_extent_post_op(trans, fs_info->extent_root);
12178         }
12179
12180         ret = reset_balance(trans, fs_info);
12181         if (ret)
12182                 fprintf(stderr, "error resetting the pending balance\n");
12183
12184         return ret;
12185 }
12186
12187 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12188 {
12189         struct btrfs_path path;
12190         struct btrfs_trans_handle *trans;
12191         struct btrfs_key key;
12192         int ret;
12193
12194         printf("Recowing metadata block %llu\n", eb->start);
12195         key.objectid = btrfs_header_owner(eb);
12196         key.type = BTRFS_ROOT_ITEM_KEY;
12197         key.offset = (u64)-1;
12198
12199         root = btrfs_read_fs_root(root->fs_info, &key);
12200         if (IS_ERR(root)) {
12201                 fprintf(stderr, "Couldn't find owner root %llu\n",
12202                         key.objectid);
12203                 return PTR_ERR(root);
12204         }
12205
12206         trans = btrfs_start_transaction(root, 1);
12207         if (IS_ERR(trans))
12208                 return PTR_ERR(trans);
12209
12210         btrfs_init_path(&path);
12211         path.lowest_level = btrfs_header_level(eb);
12212         if (path.lowest_level)
12213                 btrfs_node_key_to_cpu(eb, &key, 0);
12214         else
12215                 btrfs_item_key_to_cpu(eb, &key, 0);
12216
12217         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12218         btrfs_commit_transaction(trans, root);
12219         btrfs_release_path(&path);
12220         return ret;
12221 }
12222
12223 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12224 {
12225         struct btrfs_path path;
12226         struct btrfs_trans_handle *trans;
12227         struct btrfs_key key;
12228         int ret;
12229
12230         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12231                bad->key.type, bad->key.offset);
12232         key.objectid = bad->root_id;
12233         key.type = BTRFS_ROOT_ITEM_KEY;
12234         key.offset = (u64)-1;
12235
12236         root = btrfs_read_fs_root(root->fs_info, &key);
12237         if (IS_ERR(root)) {
12238                 fprintf(stderr, "Couldn't find owner root %llu\n",
12239                         key.objectid);
12240                 return PTR_ERR(root);
12241         }
12242
12243         trans = btrfs_start_transaction(root, 1);
12244         if (IS_ERR(trans))
12245                 return PTR_ERR(trans);
12246
12247         btrfs_init_path(&path);
12248         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12249         if (ret) {
12250                 if (ret > 0)
12251                         ret = 0;
12252                 goto out;
12253         }
12254         ret = btrfs_del_item(trans, root, &path);
12255 out:
12256         btrfs_commit_transaction(trans, root);
12257         btrfs_release_path(&path);
12258         return ret;
12259 }
12260
12261 static int zero_log_tree(struct btrfs_root *root)
12262 {
12263         struct btrfs_trans_handle *trans;
12264         int ret;
12265
12266         trans = btrfs_start_transaction(root, 1);
12267         if (IS_ERR(trans)) {
12268                 ret = PTR_ERR(trans);
12269                 return ret;
12270         }
12271         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12272         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12273         ret = btrfs_commit_transaction(trans, root);
12274         return ret;
12275 }
12276
12277 static int populate_csum(struct btrfs_trans_handle *trans,
12278                          struct btrfs_root *csum_root, char *buf, u64 start,
12279                          u64 len)
12280 {
12281         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12282         u64 offset = 0;
12283         u64 sectorsize;
12284         int ret = 0;
12285
12286         while (offset < len) {
12287                 sectorsize = fs_info->sectorsize;
12288                 ret = read_extent_data(fs_info, buf, start + offset,
12289                                        &sectorsize, 0);
12290                 if (ret)
12291                         break;
12292                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12293                                             start + offset, buf, sectorsize);
12294                 if (ret)
12295                         break;
12296                 offset += sectorsize;
12297         }
12298         return ret;
12299 }
12300
12301 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12302                                       struct btrfs_root *csum_root,
12303                                       struct btrfs_root *cur_root)
12304 {
12305         struct btrfs_path path;
12306         struct btrfs_key key;
12307         struct extent_buffer *node;
12308         struct btrfs_file_extent_item *fi;
12309         char *buf = NULL;
12310         u64 start = 0;
12311         u64 len = 0;
12312         int slot = 0;
12313         int ret = 0;
12314
12315         buf = malloc(cur_root->fs_info->sectorsize);
12316         if (!buf)
12317                 return -ENOMEM;
12318
12319         btrfs_init_path(&path);
12320         key.objectid = 0;
12321         key.offset = 0;
12322         key.type = 0;
12323         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12324         if (ret < 0)
12325                 goto out;
12326         /* Iterate all regular file extents and fill its csum */
12327         while (1) {
12328                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12329
12330                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12331                         goto next;
12332                 node = path.nodes[0];
12333                 slot = path.slots[0];
12334                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12335                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12336                         goto next;
12337                 start = btrfs_file_extent_disk_bytenr(node, fi);
12338                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12339
12340                 ret = populate_csum(trans, csum_root, buf, start, len);
12341                 if (ret == -EEXIST)
12342                         ret = 0;
12343                 if (ret < 0)
12344                         goto out;
12345 next:
12346                 /*
12347                  * TODO: if next leaf is corrupted, jump to nearest next valid
12348                  * leaf.
12349                  */
12350                 ret = btrfs_next_item(cur_root, &path);
12351                 if (ret < 0)
12352                         goto out;
12353                 if (ret > 0) {
12354                         ret = 0;
12355                         goto out;
12356                 }
12357         }
12358
12359 out:
12360         btrfs_release_path(&path);
12361         free(buf);
12362         return ret;
12363 }
12364
12365 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12366                                   struct btrfs_root *csum_root)
12367 {
12368         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12369         struct btrfs_path path;
12370         struct btrfs_root *tree_root = fs_info->tree_root;
12371         struct btrfs_root *cur_root;
12372         struct extent_buffer *node;
12373         struct btrfs_key key;
12374         int slot = 0;
12375         int ret = 0;
12376
12377         btrfs_init_path(&path);
12378         key.objectid = BTRFS_FS_TREE_OBJECTID;
12379         key.offset = 0;
12380         key.type = BTRFS_ROOT_ITEM_KEY;
12381         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12382         if (ret < 0)
12383                 goto out;
12384         if (ret > 0) {
12385                 ret = -ENOENT;
12386                 goto out;
12387         }
12388
12389         while (1) {
12390                 node = path.nodes[0];
12391                 slot = path.slots[0];
12392                 btrfs_item_key_to_cpu(node, &key, slot);
12393                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12394                         goto out;
12395                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12396                         goto next;
12397                 if (!is_fstree(key.objectid))
12398                         goto next;
12399                 key.offset = (u64)-1;
12400
12401                 cur_root = btrfs_read_fs_root(fs_info, &key);
12402                 if (IS_ERR(cur_root) || !cur_root) {
12403                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12404                                 key.objectid);
12405                         goto out;
12406                 }
12407                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12408                                 cur_root);
12409                 if (ret < 0)
12410                         goto out;
12411 next:
12412                 ret = btrfs_next_item(tree_root, &path);
12413                 if (ret > 0) {
12414                         ret = 0;
12415                         goto out;
12416                 }
12417                 if (ret < 0)
12418                         goto out;
12419         }
12420
12421 out:
12422         btrfs_release_path(&path);
12423         return ret;
12424 }
12425
12426 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12427                                       struct btrfs_root *csum_root)
12428 {
12429         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12430         struct btrfs_path path;
12431         struct btrfs_extent_item *ei;
12432         struct extent_buffer *leaf;
12433         char *buf;
12434         struct btrfs_key key;
12435         int ret;
12436
12437         btrfs_init_path(&path);
12438         key.objectid = 0;
12439         key.type = BTRFS_EXTENT_ITEM_KEY;
12440         key.offset = 0;
12441         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12442         if (ret < 0) {
12443                 btrfs_release_path(&path);
12444                 return ret;
12445         }
12446
12447         buf = malloc(csum_root->fs_info->sectorsize);
12448         if (!buf) {
12449                 btrfs_release_path(&path);
12450                 return -ENOMEM;
12451         }
12452
12453         while (1) {
12454                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12455                         ret = btrfs_next_leaf(extent_root, &path);
12456                         if (ret < 0)
12457                                 break;
12458                         if (ret) {
12459                                 ret = 0;
12460                                 break;
12461                         }
12462                 }
12463                 leaf = path.nodes[0];
12464
12465                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12466                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12467                         path.slots[0]++;
12468                         continue;
12469                 }
12470
12471                 ei = btrfs_item_ptr(leaf, path.slots[0],
12472                                     struct btrfs_extent_item);
12473                 if (!(btrfs_extent_flags(leaf, ei) &
12474                       BTRFS_EXTENT_FLAG_DATA)) {
12475                         path.slots[0]++;
12476                         continue;
12477                 }
12478
12479                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12480                                     key.offset);
12481                 if (ret)
12482                         break;
12483                 path.slots[0]++;
12484         }
12485
12486         btrfs_release_path(&path);
12487         free(buf);
12488         return ret;
12489 }
12490
12491 /*
12492  * Recalculate the csum and put it into the csum tree.
12493  *
12494  * Extent tree init will wipe out all the extent info, so in that case, we
12495  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12496  * will use fs/subvol trees to init the csum tree.
12497  */
12498 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12499                           struct btrfs_root *csum_root,
12500                           int search_fs_tree)
12501 {
12502         if (search_fs_tree)
12503                 return fill_csum_tree_from_fs(trans, csum_root);
12504         else
12505                 return fill_csum_tree_from_extent(trans, csum_root);
12506 }
12507
12508 static void free_roots_info_cache(void)
12509 {
12510         if (!roots_info_cache)
12511                 return;
12512
12513         while (!cache_tree_empty(roots_info_cache)) {
12514                 struct cache_extent *entry;
12515                 struct root_item_info *rii;
12516
12517                 entry = first_cache_extent(roots_info_cache);
12518                 if (!entry)
12519                         break;
12520                 remove_cache_extent(roots_info_cache, entry);
12521                 rii = container_of(entry, struct root_item_info, cache_extent);
12522                 free(rii);
12523         }
12524
12525         free(roots_info_cache);
12526         roots_info_cache = NULL;
12527 }
12528
12529 static int build_roots_info_cache(struct btrfs_fs_info *info)
12530 {
12531         int ret = 0;
12532         struct btrfs_key key;
12533         struct extent_buffer *leaf;
12534         struct btrfs_path path;
12535
12536         if (!roots_info_cache) {
12537                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12538                 if (!roots_info_cache)
12539                         return -ENOMEM;
12540                 cache_tree_init(roots_info_cache);
12541         }
12542
12543         btrfs_init_path(&path);
12544         key.objectid = 0;
12545         key.type = BTRFS_EXTENT_ITEM_KEY;
12546         key.offset = 0;
12547         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12548         if (ret < 0)
12549                 goto out;
12550         leaf = path.nodes[0];
12551
12552         while (1) {
12553                 struct btrfs_key found_key;
12554                 struct btrfs_extent_item *ei;
12555                 struct btrfs_extent_inline_ref *iref;
12556                 int slot = path.slots[0];
12557                 int type;
12558                 u64 flags;
12559                 u64 root_id;
12560                 u8 level;
12561                 struct cache_extent *entry;
12562                 struct root_item_info *rii;
12563
12564                 if (slot >= btrfs_header_nritems(leaf)) {
12565                         ret = btrfs_next_leaf(info->extent_root, &path);
12566                         if (ret < 0) {
12567                                 break;
12568                         } else if (ret) {
12569                                 ret = 0;
12570                                 break;
12571                         }
12572                         leaf = path.nodes[0];
12573                         slot = path.slots[0];
12574                 }
12575
12576                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12577
12578                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12579                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12580                         goto next;
12581
12582                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12583                 flags = btrfs_extent_flags(leaf, ei);
12584
12585                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12586                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12587                         goto next;
12588
12589                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12590                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12591                         level = found_key.offset;
12592                 } else {
12593                         struct btrfs_tree_block_info *binfo;
12594
12595                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12596                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12597                         level = btrfs_tree_block_level(leaf, binfo);
12598                 }
12599
12600                 /*
12601                  * For a root extent, it must be of the following type and the
12602                  * first (and only one) iref in the item.
12603                  */
12604                 type = btrfs_extent_inline_ref_type(leaf, iref);
12605                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12606                         goto next;
12607
12608                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12609                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12610                 if (!entry) {
12611                         rii = malloc(sizeof(struct root_item_info));
12612                         if (!rii) {
12613                                 ret = -ENOMEM;
12614                                 goto out;
12615                         }
12616                         rii->cache_extent.start = root_id;
12617                         rii->cache_extent.size = 1;
12618                         rii->level = (u8)-1;
12619                         entry = &rii->cache_extent;
12620                         ret = insert_cache_extent(roots_info_cache, entry);
12621                         ASSERT(ret == 0);
12622                 } else {
12623                         rii = container_of(entry, struct root_item_info,
12624                                            cache_extent);
12625                 }
12626
12627                 ASSERT(rii->cache_extent.start == root_id);
12628                 ASSERT(rii->cache_extent.size == 1);
12629
12630                 if (level > rii->level || rii->level == (u8)-1) {
12631                         rii->level = level;
12632                         rii->bytenr = found_key.objectid;
12633                         rii->gen = btrfs_extent_generation(leaf, ei);
12634                         rii->node_count = 1;
12635                 } else if (level == rii->level) {
12636                         rii->node_count++;
12637                 }
12638 next:
12639                 path.slots[0]++;
12640         }
12641
12642 out:
12643         btrfs_release_path(&path);
12644
12645         return ret;
12646 }
12647
12648 static int maybe_repair_root_item(struct btrfs_path *path,
12649                                   const struct btrfs_key *root_key,
12650                                   const int read_only_mode)
12651 {
12652         const u64 root_id = root_key->objectid;
12653         struct cache_extent *entry;
12654         struct root_item_info *rii;
12655         struct btrfs_root_item ri;
12656         unsigned long offset;
12657
12658         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12659         if (!entry) {
12660                 fprintf(stderr,
12661                         "Error: could not find extent items for root %llu\n",
12662                         root_key->objectid);
12663                 return -ENOENT;
12664         }
12665
12666         rii = container_of(entry, struct root_item_info, cache_extent);
12667         ASSERT(rii->cache_extent.start == root_id);
12668         ASSERT(rii->cache_extent.size == 1);
12669
12670         if (rii->node_count != 1) {
12671                 fprintf(stderr,
12672                         "Error: could not find btree root extent for root %llu\n",
12673                         root_id);
12674                 return -ENOENT;
12675         }
12676
12677         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12678         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12679
12680         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12681             btrfs_root_level(&ri) != rii->level ||
12682             btrfs_root_generation(&ri) != rii->gen) {
12683
12684                 /*
12685                  * If we're in repair mode but our caller told us to not update
12686                  * the root item, i.e. just check if it needs to be updated, don't
12687                  * print this message, since the caller will call us again shortly
12688                  * for the same root item without read only mode (the caller will
12689                  * open a transaction first).
12690                  */
12691                 if (!(read_only_mode && repair))
12692                         fprintf(stderr,
12693                                 "%sroot item for root %llu,"
12694                                 " current bytenr %llu, current gen %llu, current level %u,"
12695                                 " new bytenr %llu, new gen %llu, new level %u\n",
12696                                 (read_only_mode ? "" : "fixing "),
12697                                 root_id,
12698                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12699                                 btrfs_root_level(&ri),
12700                                 rii->bytenr, rii->gen, rii->level);
12701
12702                 if (btrfs_root_generation(&ri) > rii->gen) {
12703                         fprintf(stderr,
12704                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12705                                 root_id, btrfs_root_generation(&ri), rii->gen);
12706                         return -EINVAL;
12707                 }
12708
12709                 if (!read_only_mode) {
12710                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12711                         btrfs_set_root_level(&ri, rii->level);
12712                         btrfs_set_root_generation(&ri, rii->gen);
12713                         write_extent_buffer(path->nodes[0], &ri,
12714                                             offset, sizeof(ri));
12715                 }
12716
12717                 return 1;
12718         }
12719
12720         return 0;
12721 }
12722
12723 /*
12724  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12725  * caused read-only snapshots to be corrupted if they were created at a moment
12726  * when the source subvolume/snapshot had orphan items. The issue was that the
12727  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12728  * node instead of the post orphan cleanup root node.
12729  * So this function, and its callees, just detects and fixes those cases. Even
12730  * though the regression was for read-only snapshots, this function applies to
12731  * any snapshot/subvolume root.
12732  * This must be run before any other repair code - not doing it so, makes other
12733  * repair code delete or modify backrefs in the extent tree for example, which
12734  * will result in an inconsistent fs after repairing the root items.
12735  */
12736 static int repair_root_items(struct btrfs_fs_info *info)
12737 {
12738         struct btrfs_path path;
12739         struct btrfs_key key;
12740         struct extent_buffer *leaf;
12741         struct btrfs_trans_handle *trans = NULL;
12742         int ret = 0;
12743         int bad_roots = 0;
12744         int need_trans = 0;
12745
12746         btrfs_init_path(&path);
12747
12748         ret = build_roots_info_cache(info);
12749         if (ret)
12750                 goto out;
12751
12752         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12753         key.type = BTRFS_ROOT_ITEM_KEY;
12754         key.offset = 0;
12755
12756 again:
12757         /*
12758          * Avoid opening and committing transactions if a leaf doesn't have
12759          * any root items that need to be fixed, so that we avoid rotating
12760          * backup roots unnecessarily.
12761          */
12762         if (need_trans) {
12763                 trans = btrfs_start_transaction(info->tree_root, 1);
12764                 if (IS_ERR(trans)) {
12765                         ret = PTR_ERR(trans);
12766                         goto out;
12767                 }
12768         }
12769
12770         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12771                                 0, trans ? 1 : 0);
12772         if (ret < 0)
12773                 goto out;
12774         leaf = path.nodes[0];
12775
12776         while (1) {
12777                 struct btrfs_key found_key;
12778
12779                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12780                         int no_more_keys = find_next_key(&path, &key);
12781
12782                         btrfs_release_path(&path);
12783                         if (trans) {
12784                                 ret = btrfs_commit_transaction(trans,
12785                                                                info->tree_root);
12786                                 trans = NULL;
12787                                 if (ret < 0)
12788                                         goto out;
12789                         }
12790                         need_trans = 0;
12791                         if (no_more_keys)
12792                                 break;
12793                         goto again;
12794                 }
12795
12796                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12797
12798                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12799                         goto next;
12800                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12801                         goto next;
12802
12803                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12804                 if (ret < 0)
12805                         goto out;
12806                 if (ret) {
12807                         if (!trans && repair) {
12808                                 need_trans = 1;
12809                                 key = found_key;
12810                                 btrfs_release_path(&path);
12811                                 goto again;
12812                         }
12813                         bad_roots++;
12814                 }
12815 next:
12816                 path.slots[0]++;
12817         }
12818         ret = 0;
12819 out:
12820         free_roots_info_cache();
12821         btrfs_release_path(&path);
12822         if (trans)
12823                 btrfs_commit_transaction(trans, info->tree_root);
12824         if (ret < 0)
12825                 return ret;
12826
12827         return bad_roots;
12828 }
12829
12830 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12831 {
12832         struct btrfs_trans_handle *trans;
12833         struct btrfs_block_group_cache *bg_cache;
12834         u64 current = 0;
12835         int ret = 0;
12836
12837         /* Clear all free space cache inodes and its extent data */
12838         while (1) {
12839                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12840                 if (!bg_cache)
12841                         break;
12842                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12843                 if (ret < 0)
12844                         return ret;
12845                 current = bg_cache->key.objectid + bg_cache->key.offset;
12846         }
12847
12848         /* Don't forget to set cache_generation to -1 */
12849         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12850         if (IS_ERR(trans)) {
12851                 error("failed to update super block cache generation");
12852                 return PTR_ERR(trans);
12853         }
12854         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12855         btrfs_commit_transaction(trans, fs_info->tree_root);
12856
12857         return ret;
12858 }
12859
12860 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12861                 int clear_version)
12862 {
12863         int ret = 0;
12864
12865         if (clear_version == 1) {
12866                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12867                         error(
12868                 "free space cache v2 detected, use --clear-space-cache v2");
12869                         ret = 1;
12870                         goto close_out;
12871                 }
12872                 printf("Clearing free space cache\n");
12873                 ret = clear_free_space_cache(fs_info);
12874                 if (ret) {
12875                         error("failed to clear free space cache");
12876                         ret = 1;
12877                 } else {
12878                         printf("Free space cache cleared\n");
12879                 }
12880         } else if (clear_version == 2) {
12881                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12882                         printf("no free space cache v2 to clear\n");
12883                         ret = 0;
12884                         goto close_out;
12885                 }
12886                 printf("Clear free space cache v2\n");
12887                 ret = btrfs_clear_free_space_tree(fs_info);
12888                 if (ret) {
12889                         error("failed to clear free space cache v2: %d", ret);
12890                         ret = 1;
12891                 } else {
12892                         printf("free space cache v2 cleared\n");
12893                 }
12894         }
12895 close_out:
12896         return ret;
12897 }
12898
12899 const char * const cmd_check_usage[] = {
12900         "btrfs check [options] <device>",
12901         "Check structural integrity of a filesystem (unmounted).",
12902         "Check structural integrity of an unmounted filesystem. Verify internal",
12903         "trees' consistency and item connectivity. In the repair mode try to",
12904         "fix the problems found. ",
12905         "WARNING: the repair mode is considered dangerous",
12906         "",
12907         "-s|--super <superblock>     use this superblock copy",
12908         "-b|--backup                 use the first valid backup root copy",
12909         "--force                     skip mount checks, repair is not possible",
12910         "--repair                    try to repair the filesystem",
12911         "--readonly                  run in read-only mode (default)",
12912         "--init-csum-tree            create a new CRC tree",
12913         "--init-extent-tree          create a new extent tree",
12914         "--mode <MODE>               allows choice of memory/IO trade-offs",
12915         "                            where MODE is one of:",
12916         "                            original - read inodes and extents to memory (requires",
12917         "                                       more memory, does less IO)",
12918         "                            lowmem   - try to use less memory but read blocks again",
12919         "                                       when needed",
12920         "--check-data-csum           verify checksums of data blocks",
12921         "-Q|--qgroup-report          print a report on qgroup consistency",
12922         "-E|--subvol-extents <subvolid>",
12923         "                            print subvolume extents and sharing state",
12924         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12925         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12926         "-p|--progress               indicate progress",
12927         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12928         NULL
12929 };
12930
12931 int cmd_check(int argc, char **argv)
12932 {
12933         struct cache_tree root_cache;
12934         struct btrfs_root *root;
12935         struct btrfs_fs_info *info;
12936         u64 bytenr = 0;
12937         u64 subvolid = 0;
12938         u64 tree_root_bytenr = 0;
12939         u64 chunk_root_bytenr = 0;
12940         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12941         int ret = 0;
12942         int err = 0;
12943         u64 num;
12944         int init_csum_tree = 0;
12945         int readonly = 0;
12946         int clear_space_cache = 0;
12947         int qgroup_report = 0;
12948         int qgroups_repaired = 0;
12949         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12950         int force = 0;
12951
12952         while(1) {
12953                 int c;
12954                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12955                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12956                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12957                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
12958                         GETOPT_VAL_FORCE };
12959                 static const struct option long_options[] = {
12960                         { "super", required_argument, NULL, 's' },
12961                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12962                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12963                         { "init-csum-tree", no_argument, NULL,
12964                                 GETOPT_VAL_INIT_CSUM },
12965                         { "init-extent-tree", no_argument, NULL,
12966                                 GETOPT_VAL_INIT_EXTENT },
12967                         { "check-data-csum", no_argument, NULL,
12968                                 GETOPT_VAL_CHECK_CSUM },
12969                         { "backup", no_argument, NULL, 'b' },
12970                         { "subvol-extents", required_argument, NULL, 'E' },
12971                         { "qgroup-report", no_argument, NULL, 'Q' },
12972                         { "tree-root", required_argument, NULL, 'r' },
12973                         { "chunk-root", required_argument, NULL,
12974                                 GETOPT_VAL_CHUNK_TREE },
12975                         { "progress", no_argument, NULL, 'p' },
12976                         { "mode", required_argument, NULL,
12977                                 GETOPT_VAL_MODE },
12978                         { "clear-space-cache", required_argument, NULL,
12979                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12980                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
12981                         { NULL, 0, NULL, 0}
12982                 };
12983
12984                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12985                 if (c < 0)
12986                         break;
12987                 switch(c) {
12988                         case 'a': /* ignored */ break;
12989                         case 'b':
12990                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12991                                 break;
12992                         case 's':
12993                                 num = arg_strtou64(optarg);
12994                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12995                                         error(
12996                                         "super mirror should be less than %d",
12997                                                 BTRFS_SUPER_MIRROR_MAX);
12998                                         exit(1);
12999                                 }
13000                                 bytenr = btrfs_sb_offset(((int)num));
13001                                 printf("using SB copy %llu, bytenr %llu\n", num,
13002                                        (unsigned long long)bytenr);
13003                                 break;
13004                         case 'Q':
13005                                 qgroup_report = 1;
13006                                 break;
13007                         case 'E':
13008                                 subvolid = arg_strtou64(optarg);
13009                                 break;
13010                         case 'r':
13011                                 tree_root_bytenr = arg_strtou64(optarg);
13012                                 break;
13013                         case GETOPT_VAL_CHUNK_TREE:
13014                                 chunk_root_bytenr = arg_strtou64(optarg);
13015                                 break;
13016                         case 'p':
13017                                 ctx.progress_enabled = true;
13018                                 break;
13019                         case '?':
13020                         case 'h':
13021                                 usage(cmd_check_usage);
13022                         case GETOPT_VAL_REPAIR:
13023                                 printf("enabling repair mode\n");
13024                                 repair = 1;
13025                                 ctree_flags |= OPEN_CTREE_WRITES;
13026                                 break;
13027                         case GETOPT_VAL_READONLY:
13028                                 readonly = 1;
13029                                 break;
13030                         case GETOPT_VAL_INIT_CSUM:
13031                                 printf("Creating a new CRC tree\n");
13032                                 init_csum_tree = 1;
13033                                 repair = 1;
13034                                 ctree_flags |= OPEN_CTREE_WRITES;
13035                                 break;
13036                         case GETOPT_VAL_INIT_EXTENT:
13037                                 init_extent_tree = 1;
13038                                 ctree_flags |= (OPEN_CTREE_WRITES |
13039                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13040                                 repair = 1;
13041                                 break;
13042                         case GETOPT_VAL_CHECK_CSUM:
13043                                 check_data_csum = 1;
13044                                 break;
13045                         case GETOPT_VAL_MODE:
13046                                 check_mode = parse_check_mode(optarg);
13047                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13048                                         error("unknown mode: %s", optarg);
13049                                         exit(1);
13050                                 }
13051                                 break;
13052                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13053                                 if (strcmp(optarg, "v1") == 0) {
13054                                         clear_space_cache = 1;
13055                                 } else if (strcmp(optarg, "v2") == 0) {
13056                                         clear_space_cache = 2;
13057                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13058                                 } else {
13059                                         error(
13060                 "invalid argument to --clear-space-cache, must be v1 or v2");
13061                                         exit(1);
13062                                 }
13063                                 ctree_flags |= OPEN_CTREE_WRITES;
13064                                 break;
13065                         case GETOPT_VAL_FORCE:
13066                                 force = 1;
13067                                 break;
13068                 }
13069         }
13070
13071         if (check_argc_exact(argc - optind, 1))
13072                 usage(cmd_check_usage);
13073
13074         if (ctx.progress_enabled) {
13075                 ctx.tp = TASK_NOTHING;
13076                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13077         }
13078
13079         /* This check is the only reason for --readonly to exist */
13080         if (readonly && repair) {
13081                 error("repair options are not compatible with --readonly");
13082                 exit(1);
13083         }
13084
13085         /*
13086          * Not supported yet
13087          */
13088         if (repair && check_mode == CHECK_MODE_LOWMEM) {
13089                 error("low memory mode doesn't support repair yet");
13090                 exit(1);
13091         }
13092
13093         radix_tree_init();
13094         cache_tree_init(&root_cache);
13095
13096         ret = check_mounted(argv[optind]);
13097         if (!force) {
13098                 if (ret < 0) {
13099                         error("could not check mount status: %s",
13100                                         strerror(-ret));
13101                         err |= !!ret;
13102                         goto err_out;
13103                 } else if (ret) {
13104                         error(
13105 "%s is currently mounted, use --force if you really intend to check the filesystem",
13106                                 argv[optind]);
13107                         ret = -EBUSY;
13108                         err |= !!ret;
13109                         goto err_out;
13110                 }
13111         } else {
13112                 if (repair) {
13113                         error("repair and --force is not yet supported");
13114                         ret = 1;
13115                         err |= !!ret;
13116                         goto err_out;
13117                 }
13118                 if (ret < 0) {
13119                         warning(
13120 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13121                                 argv[optind]);
13122                 } else if (ret) {
13123                         warning(
13124                         "filesystem mounted, continuing because of --force");
13125                 }
13126         }
13127
13128         /* only allow partial opening under repair mode */
13129         if (repair)
13130                 ctree_flags |= OPEN_CTREE_PARTIAL;
13131
13132         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13133                                   chunk_root_bytenr, ctree_flags);
13134         if (!info) {
13135                 error("cannot open file system");
13136                 ret = -EIO;
13137                 err |= !!ret;
13138                 goto err_out;
13139         }
13140
13141         global_info = info;
13142         root = info->fs_root;
13143         uuid_unparse(info->super_copy->fsid, uuidbuf);
13144
13145         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13146
13147         /*
13148          * Check the bare minimum before starting anything else that could rely
13149          * on it, namely the tree roots, any local consistency checks
13150          */
13151         if (!extent_buffer_uptodate(info->tree_root->node) ||
13152             !extent_buffer_uptodate(info->dev_root->node) ||
13153             !extent_buffer_uptodate(info->chunk_root->node)) {
13154                 error("critical roots corrupted, unable to check the filesystem");
13155                 err |= !!ret;
13156                 ret = -EIO;
13157                 goto close_out;
13158         }
13159
13160         if (clear_space_cache) {
13161                 ret = do_clear_free_space_cache(info, clear_space_cache);
13162                 err |= !!ret;
13163                 goto close_out;
13164         }
13165
13166         /*
13167          * repair mode will force us to commit transaction which
13168          * will make us fail to load log tree when mounting.
13169          */
13170         if (repair && btrfs_super_log_root(info->super_copy)) {
13171                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13172                 if (!ret) {
13173                         ret = 1;
13174                         err |= !!ret;
13175                         goto close_out;
13176                 }
13177                 ret = zero_log_tree(root);
13178                 err |= !!ret;
13179                 if (ret) {
13180                         error("failed to zero log tree: %d", ret);
13181                         goto close_out;
13182                 }
13183         }
13184
13185         if (qgroup_report) {
13186                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13187                        uuidbuf);
13188                 ret = qgroup_verify_all(info);
13189                 err |= !!ret;
13190                 if (ret == 0)
13191                         report_qgroups(1);
13192                 goto close_out;
13193         }
13194         if (subvolid) {
13195                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13196                        subvolid, argv[optind], uuidbuf);
13197                 ret = print_extent_state(info, subvolid);
13198                 err |= !!ret;
13199                 goto close_out;
13200         }
13201
13202         if (init_extent_tree || init_csum_tree) {
13203                 struct btrfs_trans_handle *trans;
13204
13205                 trans = btrfs_start_transaction(info->extent_root, 0);
13206                 if (IS_ERR(trans)) {
13207                         error("error starting transaction");
13208                         ret = PTR_ERR(trans);
13209                         err |= !!ret;
13210                         goto close_out;
13211                 }
13212
13213                 if (init_extent_tree) {
13214                         printf("Creating a new extent tree\n");
13215                         ret = reinit_extent_tree(trans, info);
13216                         err |= !!ret;
13217                         if (ret)
13218                                 goto close_out;
13219                 }
13220
13221                 if (init_csum_tree) {
13222                         printf("Reinitialize checksum tree\n");
13223                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13224                         if (ret) {
13225                                 error("checksum tree initialization failed: %d",
13226                                                 ret);
13227                                 ret = -EIO;
13228                                 err |= !!ret;
13229                                 goto close_out;
13230                         }
13231
13232                         ret = fill_csum_tree(trans, info->csum_root,
13233                                              init_extent_tree);
13234                         err |= !!ret;
13235                         if (ret) {
13236                                 error("checksum tree refilling failed: %d", ret);
13237                                 return -EIO;
13238                         }
13239                 }
13240                 /*
13241                  * Ok now we commit and run the normal fsck, which will add
13242                  * extent entries for all of the items it finds.
13243                  */
13244                 ret = btrfs_commit_transaction(trans, info->extent_root);
13245                 err |= !!ret;
13246                 if (ret)
13247                         goto close_out;
13248         }
13249         if (!extent_buffer_uptodate(info->extent_root->node)) {
13250                 error("critical: extent_root, unable to check the filesystem");
13251                 ret = -EIO;
13252                 err |= !!ret;
13253                 goto close_out;
13254         }
13255         if (!extent_buffer_uptodate(info->csum_root->node)) {
13256                 error("critical: csum_root, unable to check the filesystem");
13257                 ret = -EIO;
13258                 err |= !!ret;
13259                 goto close_out;
13260         }
13261
13262         ret = do_check_chunks_and_extents(info);
13263         err |= !!ret;
13264         if (ret)
13265                 error(
13266                 "errors found in extent allocation tree or chunk allocation");
13267
13268         ret = repair_root_items(info);
13269         err |= !!ret;
13270         if (ret < 0) {
13271                 error("failed to repair root items: %s", strerror(-ret));
13272                 goto close_out;
13273         }
13274         if (repair) {
13275                 fprintf(stderr, "Fixed %d roots.\n", ret);
13276                 ret = 0;
13277         } else if (ret > 0) {
13278                 fprintf(stderr,
13279                        "Found %d roots with an outdated root item.\n",
13280                        ret);
13281                 fprintf(stderr,
13282                         "Please run a filesystem check with the option --repair to fix them.\n");
13283                 ret = 1;
13284                 err |= !!ret;
13285                 goto close_out;
13286         }
13287
13288         if (!ctx.progress_enabled) {
13289                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13290                         fprintf(stderr, "checking free space tree\n");
13291                 else
13292                         fprintf(stderr, "checking free space cache\n");
13293         }
13294         ret = check_space_cache(root);
13295         err |= !!ret;
13296         if (ret) {
13297                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13298                         error("errors found in free space tree");
13299                 else
13300                         error("errors found in free space cache");
13301                 goto out;
13302         }
13303
13304         /*
13305          * We used to have to have these hole extents in between our real
13306          * extents so if we don't have this flag set we need to make sure there
13307          * are no gaps in the file extents for inodes, otherwise we can just
13308          * ignore it when this happens.
13309          */
13310         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13311         ret = do_check_fs_roots(info, &root_cache);
13312         err |= !!ret;
13313         if (ret) {
13314                 error("errors found in fs roots");
13315                 goto out;
13316         }
13317
13318         fprintf(stderr, "checking csums\n");
13319         ret = check_csums(root);
13320         err |= !!ret;
13321         if (ret) {
13322                 error("errors found in csum tree");
13323                 goto out;
13324         }
13325
13326         fprintf(stderr, "checking root refs\n");
13327         /* For low memory mode, check_fs_roots_v2 handles root refs */
13328         if (check_mode != CHECK_MODE_LOWMEM) {
13329                 ret = check_root_refs(root, &root_cache);
13330                 err |= !!ret;
13331                 if (ret) {
13332                         error("errors found in root refs");
13333                         goto out;
13334                 }
13335         }
13336
13337         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13338                 struct extent_buffer *eb;
13339
13340                 eb = list_first_entry(&root->fs_info->recow_ebs,
13341                                       struct extent_buffer, recow);
13342                 list_del_init(&eb->recow);
13343                 ret = recow_extent_buffer(root, eb);
13344                 err |= !!ret;
13345                 if (ret) {
13346                         error("fails to fix transid errors");
13347                         break;
13348                 }
13349         }
13350
13351         while (!list_empty(&delete_items)) {
13352                 struct bad_item *bad;
13353
13354                 bad = list_first_entry(&delete_items, struct bad_item, list);
13355                 list_del_init(&bad->list);
13356                 if (repair) {
13357                         ret = delete_bad_item(root, bad);
13358                         err |= !!ret;
13359                 }
13360                 free(bad);
13361         }
13362
13363         if (info->quota_enabled) {
13364                 fprintf(stderr, "checking quota groups\n");
13365                 ret = qgroup_verify_all(info);
13366                 err |= !!ret;
13367                 if (ret) {
13368                         error("failed to check quota groups");
13369                         goto out;
13370                 }
13371                 report_qgroups(0);
13372                 ret = repair_qgroups(info, &qgroups_repaired);
13373                 err |= !!ret;
13374                 if (err) {
13375                         error("failed to repair quota groups");
13376                         goto out;
13377                 }
13378                 ret = 0;
13379         }
13380
13381         if (!list_empty(&root->fs_info->recow_ebs)) {
13382                 error("transid errors in file system");
13383                 ret = 1;
13384                 err |= !!ret;
13385         }
13386 out:
13387         printf("found %llu bytes used, ",
13388                (unsigned long long)bytes_used);
13389         if (err)
13390                 printf("error(s) found\n");
13391         else
13392                 printf("no error found\n");
13393         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13394         printf("total tree bytes: %llu\n",
13395                (unsigned long long)total_btree_bytes);
13396         printf("total fs tree bytes: %llu\n",
13397                (unsigned long long)total_fs_tree_bytes);
13398         printf("total extent tree bytes: %llu\n",
13399                (unsigned long long)total_extent_tree_bytes);
13400         printf("btree space waste bytes: %llu\n",
13401                (unsigned long long)btree_space_waste);
13402         printf("file data blocks allocated: %llu\n referenced %llu\n",
13403                 (unsigned long long)data_bytes_allocated,
13404                 (unsigned long long)data_bytes_referenced);
13405
13406         free_qgroup_counts();
13407         free_root_recs_tree(&root_cache);
13408 close_out:
13409         close_ctree(root);
13410 err_out:
13411         if (ctx.progress_enabled)
13412                 task_deinit(ctx.info);
13413
13414         return err;
13415 }