btrfs-progs: check: repair inode nbytes in lowmem mode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
140 {
141         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143         struct data_backref *back1 = to_data_backref(ext1);
144         struct data_backref *back2 = to_data_backref(ext2);
145
146         WARN_ON(!ext1->is_data);
147         WARN_ON(!ext2->is_data);
148
149         /* parent and root are a union, so this covers both */
150         if (back1->parent > back2->parent)
151                 return 1;
152         if (back1->parent < back2->parent)
153                 return -1;
154
155         /* This is a full backref and the parents match. */
156         if (back1->node.full_backref)
157                 return 0;
158
159         if (back1->owner > back2->owner)
160                 return 1;
161         if (back1->owner < back2->owner)
162                 return -1;
163
164         if (back1->offset > back2->offset)
165                 return 1;
166         if (back1->offset < back2->offset)
167                 return -1;
168
169         if (back1->found_ref && back2->found_ref) {
170                 if (back1->disk_bytenr > back2->disk_bytenr)
171                         return 1;
172                 if (back1->disk_bytenr < back2->disk_bytenr)
173                         return -1;
174
175                 if (back1->bytes > back2->bytes)
176                         return 1;
177                 if (back1->bytes < back2->bytes)
178                         return -1;
179         }
180
181         return 0;
182 }
183
184 /*
185  * Much like data_backref, just removed the undetermined members
186  * and change it to use list_head.
187  * During extent scan, it is stored in root->orphan_data_extent.
188  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
189  */
190 struct orphan_data_extent {
191         struct list_head list;
192         u64 root;
193         u64 objectid;
194         u64 offset;
195         u64 disk_bytenr;
196         u64 disk_len;
197 };
198
199 struct tree_backref {
200         struct extent_backref node;
201         union {
202                 u64 parent;
203                 u64 root;
204         };
205 };
206
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
208 {
209         return container_of(back, struct tree_backref, node);
210 }
211
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
213 {
214         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216         struct tree_backref *back1 = to_tree_backref(ext1);
217         struct tree_backref *back2 = to_tree_backref(ext2);
218
219         WARN_ON(ext1->is_data);
220         WARN_ON(ext2->is_data);
221
222         /* parent and root are a union, so this covers both */
223         if (back1->parent > back2->parent)
224                 return 1;
225         if (back1->parent < back2->parent)
226                 return -1;
227
228         return 0;
229 }
230
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
232 {
233         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
235
236         if (ext1->is_data > ext2->is_data)
237                 return 1;
238
239         if (ext1->is_data < ext2->is_data)
240                 return -1;
241
242         if (ext1->full_backref > ext2->full_backref)
243                 return 1;
244         if (ext1->full_backref < ext2->full_backref)
245                 return -1;
246
247         if (ext1->is_data)
248                 return compare_data_backref(node1, node2);
249         else
250                 return compare_tree_backref(node1, node2);
251 }
252
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
255
256 struct extent_record {
257         struct list_head backrefs;
258         struct list_head dups;
259         struct rb_root backref_tree;
260         struct list_head list;
261         struct cache_extent cache;
262         struct btrfs_disk_key parent_key;
263         u64 start;
264         u64 max_size;
265         u64 nr;
266         u64 refs;
267         u64 extent_item_refs;
268         u64 generation;
269         u64 parent_generation;
270         u64 info_objectid;
271         u32 num_duplicates;
272         u8 info_level;
273         unsigned int flag_block_full_backref:2;
274         unsigned int found_rec:1;
275         unsigned int content_checked:1;
276         unsigned int owner_ref_checked:1;
277         unsigned int is_root:1;
278         unsigned int metadata:1;
279         unsigned int bad_full_backref:1;
280         unsigned int crossing_stripes:1;
281         unsigned int wrong_chunk_type:1;
282 };
283
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
285 {
286         return container_of(entry, struct extent_record, list);
287 }
288
289 struct inode_backref {
290         struct list_head list;
291         unsigned int found_dir_item:1;
292         unsigned int found_dir_index:1;
293         unsigned int found_inode_ref:1;
294         u8 filetype;
295         u8 ref_type;
296         int errors;
297         u64 dir;
298         u64 index;
299         u16 namelen;
300         char name[0];
301 };
302
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
304 {
305         return list_entry(entry, struct inode_backref, list);
306 }
307
308 struct root_item_record {
309         struct list_head list;
310         u64 objectid;
311         u64 bytenr;
312         u64 last_snapshot;
313         u8 level;
314         u8 drop_level;
315         struct btrfs_key drop_key;
316 };
317
318 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
320 #define REF_ERR_NO_INODE_REF            (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
323 #define REF_ERR_DUP_INODE_REF           (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF             (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
331
332 struct file_extent_hole {
333         struct rb_node node;
334         u64 start;
335         u64 len;
336 };
337
338 struct inode_record {
339         struct list_head backrefs;
340         unsigned int checked:1;
341         unsigned int merging:1;
342         unsigned int found_inode_item:1;
343         unsigned int found_dir_item:1;
344         unsigned int found_file_extent:1;
345         unsigned int found_csum_item:1;
346         unsigned int some_csum_missing:1;
347         unsigned int nodatasum:1;
348         int errors;
349
350         u64 ino;
351         u32 nlink;
352         u32 imode;
353         u64 isize;
354         u64 nbytes;
355
356         u32 found_link;
357         u64 found_size;
358         u64 extent_start;
359         u64 extent_end;
360         struct rb_root holes;
361         struct list_head orphan_extents;
362
363         u32 refs;
364 };
365
366 #define I_ERR_NO_INODE_ITEM             (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
381
382 struct root_backref {
383         struct list_head list;
384         unsigned int found_dir_item:1;
385         unsigned int found_dir_index:1;
386         unsigned int found_back_ref:1;
387         unsigned int found_forward_ref:1;
388         unsigned int reachable:1;
389         int errors;
390         u64 ref_root;
391         u64 dir;
392         u64 index;
393         u16 namelen;
394         char name[0];
395 };
396
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
398 {
399         return list_entry(entry, struct root_backref, list);
400 }
401
402 struct root_record {
403         struct list_head backrefs;
404         struct cache_extent cache;
405         unsigned int found_root_item:1;
406         u64 objectid;
407         u32 found_ref;
408 };
409
410 struct ptr_node {
411         struct cache_extent cache;
412         void *data;
413 };
414
415 struct shared_node {
416         struct cache_extent cache;
417         struct cache_tree root_cache;
418         struct cache_tree inode_cache;
419         struct inode_record *current;
420         u32 refs;
421 };
422
423 struct block_info {
424         u64 start;
425         u32 size;
426 };
427
428 struct walk_control {
429         struct cache_tree shared;
430         struct shared_node *nodes[BTRFS_MAX_LEVEL];
431         int active_node;
432         int root_level;
433 };
434
435 struct bad_item {
436         struct btrfs_key key;
437         u64 root_id;
438         struct list_head list;
439 };
440
441 struct extent_entry {
442         u64 bytenr;
443         u64 bytes;
444         int count;
445         int broken;
446         struct list_head list;
447 };
448
449 struct root_item_info {
450         /* level of the root */
451         u8 level;
452         /* number of nodes at this level, must be 1 for a root */
453         int node_count;
454         u64 bytenr;
455         u64 gen;
456         struct cache_extent cache_extent;
457 };
458
459 /*
460  * Error bit for low memory mode check.
461  *
462  * Currently no caller cares about it yet.  Just internal use for error
463  * classification.
464  */
465 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH     (1 << 8)
475
476 static void *print_status_check(void *p)
477 {
478         struct task_ctx *priv = p;
479         const char work_indicator[] = { '.', 'o', 'O', 'o' };
480         uint32_t count = 0;
481         static char *task_position_string[] = {
482                 "checking extents",
483                 "checking free space cache",
484                 "checking fs roots",
485         };
486
487         task_period_start(priv->info, 1000 /* 1s */);
488
489         if (priv->tp == TASK_NOTHING)
490                 return NULL;
491
492         while (1) {
493                 printf("%s [%c]\r", task_position_string[priv->tp],
494                                 work_indicator[count % 4]);
495                 count++;
496                 fflush(stdout);
497                 task_period_wait(priv->info);
498         }
499         return NULL;
500 }
501
502 static int print_status_return(void *p)
503 {
504         printf("\n");
505         fflush(stdout);
506
507         return 0;
508 }
509
510 static enum btrfs_check_mode parse_check_mode(const char *str)
511 {
512         if (strcmp(str, "lowmem") == 0)
513                 return CHECK_MODE_LOWMEM;
514         if (strcmp(str, "orig") == 0)
515                 return CHECK_MODE_ORIGINAL;
516         if (strcmp(str, "original") == 0)
517                 return CHECK_MODE_ORIGINAL;
518
519         return CHECK_MODE_UNKNOWN;
520 }
521
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
524 {
525         struct file_extent_hole *hole;
526
527         if (RB_EMPTY_ROOT(holes))
528                 return (u64)-1;
529
530         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
531         return hole->start;
532 }
533
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
535 {
536         struct file_extent_hole *hole1;
537         struct file_extent_hole *hole2;
538
539         hole1 = rb_entry(node1, struct file_extent_hole, node);
540         hole2 = rb_entry(node2, struct file_extent_hole, node);
541
542         if (hole1->start > hole2->start)
543                 return -1;
544         if (hole1->start < hole2->start)
545                 return 1;
546         /* Now hole1->start == hole2->start */
547         if (hole1->len >= hole2->len)
548                 /*
549                  * Hole 1 will be merge center
550                  * Same hole will be merged later
551                  */
552                 return -1;
553         /* Hole 2 will be merge center */
554         return 1;
555 }
556
557 /*
558  * Add a hole to the record
559  *
560  * This will do hole merge for copy_file_extent_holes(),
561  * which will ensure there won't be continuous holes.
562  */
563 static int add_file_extent_hole(struct rb_root *holes,
564                                 u64 start, u64 len)
565 {
566         struct file_extent_hole *hole;
567         struct file_extent_hole *prev = NULL;
568         struct file_extent_hole *next = NULL;
569
570         hole = malloc(sizeof(*hole));
571         if (!hole)
572                 return -ENOMEM;
573         hole->start = start;
574         hole->len = len;
575         /* Since compare will not return 0, no -EEXIST will happen */
576         rb_insert(holes, &hole->node, compare_hole);
577
578         /* simple merge with previous hole */
579         if (rb_prev(&hole->node))
580                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
581                                 node);
582         if (prev && prev->start + prev->len >= hole->start) {
583                 hole->len = hole->start + hole->len - prev->start;
584                 hole->start = prev->start;
585                 rb_erase(&prev->node, holes);
586                 free(prev);
587                 prev = NULL;
588         }
589
590         /* iterate merge with next holes */
591         while (1) {
592                 if (!rb_next(&hole->node))
593                         break;
594                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
595                                         node);
596                 if (hole->start + hole->len >= next->start) {
597                         if (hole->start + hole->len <= next->start + next->len)
598                                 hole->len = next->start + next->len -
599                                             hole->start;
600                         rb_erase(&next->node, holes);
601                         free(next);
602                         next = NULL;
603                 } else
604                         break;
605         }
606         return 0;
607 }
608
609 static int compare_hole_range(struct rb_node *node, void *data)
610 {
611         struct file_extent_hole *hole;
612         u64 start;
613
614         hole = (struct file_extent_hole *)data;
615         start = hole->start;
616
617         hole = rb_entry(node, struct file_extent_hole, node);
618         if (start < hole->start)
619                 return -1;
620         if (start >= hole->start && start < hole->start + hole->len)
621                 return 0;
622         return 1;
623 }
624
625 /*
626  * Delete a hole in the record
627  *
628  * This will do the hole split and is much restrict than add.
629  */
630 static int del_file_extent_hole(struct rb_root *holes,
631                                 u64 start, u64 len)
632 {
633         struct file_extent_hole *hole;
634         struct file_extent_hole tmp;
635         u64 prev_start = 0;
636         u64 prev_len = 0;
637         u64 next_start = 0;
638         u64 next_len = 0;
639         struct rb_node *node;
640         int have_prev = 0;
641         int have_next = 0;
642         int ret = 0;
643
644         tmp.start = start;
645         tmp.len = len;
646         node = rb_search(holes, &tmp, compare_hole_range, NULL);
647         if (!node)
648                 return -EEXIST;
649         hole = rb_entry(node, struct file_extent_hole, node);
650         if (start + len > hole->start + hole->len)
651                 return -EEXIST;
652
653         /*
654          * Now there will be no overlap, delete the hole and re-add the
655          * split(s) if they exists.
656          */
657         if (start > hole->start) {
658                 prev_start = hole->start;
659                 prev_len = start - hole->start;
660                 have_prev = 1;
661         }
662         if (hole->start + hole->len > start + len) {
663                 next_start = start + len;
664                 next_len = hole->start + hole->len - start - len;
665                 have_next = 1;
666         }
667         rb_erase(node, holes);
668         free(hole);
669         if (have_prev) {
670                 ret = add_file_extent_hole(holes, prev_start, prev_len);
671                 if (ret < 0)
672                         return ret;
673         }
674         if (have_next) {
675                 ret = add_file_extent_hole(holes, next_start, next_len);
676                 if (ret < 0)
677                         return ret;
678         }
679         return 0;
680 }
681
682 static int copy_file_extent_holes(struct rb_root *dst,
683                                   struct rb_root *src)
684 {
685         struct file_extent_hole *hole;
686         struct rb_node *node;
687         int ret = 0;
688
689         node = rb_first(src);
690         while (node) {
691                 hole = rb_entry(node, struct file_extent_hole, node);
692                 ret = add_file_extent_hole(dst, hole->start, hole->len);
693                 if (ret)
694                         break;
695                 node = rb_next(node);
696         }
697         return ret;
698 }
699
700 static void free_file_extent_holes(struct rb_root *holes)
701 {
702         struct rb_node *node;
703         struct file_extent_hole *hole;
704
705         node = rb_first(holes);
706         while (node) {
707                 hole = rb_entry(node, struct file_extent_hole, node);
708                 rb_erase(node, holes);
709                 free(hole);
710                 node = rb_first(holes);
711         }
712 }
713
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
715
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717                                  struct btrfs_root *root)
718 {
719         if (root->last_trans != trans->transid) {
720                 root->track_dirty = 1;
721                 root->last_trans = trans->transid;
722                 root->commit_root = root->node;
723                 extent_buffer_get(root->node);
724         }
725 }
726
727 static u8 imode_to_type(u32 imode)
728 {
729 #define S_SHIFT 12
730         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
732                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
733                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
734                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
735                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
736                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
737                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
738         };
739
740         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
741 #undef S_SHIFT
742 }
743
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
745 {
746         struct device_record *rec1;
747         struct device_record *rec2;
748
749         rec1 = rb_entry(node1, struct device_record, node);
750         rec2 = rb_entry(node2, struct device_record, node);
751         if (rec1->devid > rec2->devid)
752                 return -1;
753         else if (rec1->devid < rec2->devid)
754                 return 1;
755         else
756                 return 0;
757 }
758
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
760 {
761         struct inode_record *rec;
762         struct inode_backref *backref;
763         struct inode_backref *orig;
764         struct inode_backref *tmp;
765         struct orphan_data_extent *src_orphan;
766         struct orphan_data_extent *dst_orphan;
767         struct rb_node *rb;
768         size_t size;
769         int ret;
770
771         rec = malloc(sizeof(*rec));
772         if (!rec)
773                 return ERR_PTR(-ENOMEM);
774         memcpy(rec, orig_rec, sizeof(*rec));
775         rec->refs = 1;
776         INIT_LIST_HEAD(&rec->backrefs);
777         INIT_LIST_HEAD(&rec->orphan_extents);
778         rec->holes = RB_ROOT;
779
780         list_for_each_entry(orig, &orig_rec->backrefs, list) {
781                 size = sizeof(*orig) + orig->namelen + 1;
782                 backref = malloc(size);
783                 if (!backref) {
784                         ret = -ENOMEM;
785                         goto cleanup;
786                 }
787                 memcpy(backref, orig, size);
788                 list_add_tail(&backref->list, &rec->backrefs);
789         }
790         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791                 dst_orphan = malloc(sizeof(*dst_orphan));
792                 if (!dst_orphan) {
793                         ret = -ENOMEM;
794                         goto cleanup;
795                 }
796                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
798         }
799         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
800         if (ret < 0)
801                 goto cleanup_rb;
802
803         return rec;
804
805 cleanup_rb:
806         rb = rb_first(&rec->holes);
807         while (rb) {
808                 struct file_extent_hole *hole;
809
810                 hole = rb_entry(rb, struct file_extent_hole, node);
811                 rb = rb_next(rb);
812                 free(hole);
813         }
814
815 cleanup:
816         if (!list_empty(&rec->backrefs))
817                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818                         list_del(&orig->list);
819                         free(orig);
820                 }
821
822         if (!list_empty(&rec->orphan_extents))
823                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824                         list_del(&orig->list);
825                         free(orig);
826                 }
827
828         free(rec);
829
830         return ERR_PTR(ret);
831 }
832
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
834                                       u64 objectid)
835 {
836         struct orphan_data_extent *orphan;
837
838         if (list_empty(orphan_extents))
839                 return;
840         printf("The following data extent is lost in tree %llu:\n",
841                objectid);
842         list_for_each_entry(orphan, orphan_extents, list) {
843                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
845                        orphan->disk_len);
846         }
847 }
848
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
850 {
851         u64 root_objectid = root->root_key.objectid;
852         int errors = rec->errors;
853
854         if (!errors)
855                 return;
856         /* reloc root errors, we print its corresponding fs root objectid*/
857         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858                 root_objectid = root->root_key.offset;
859                 fprintf(stderr, "reloc");
860         }
861         fprintf(stderr, "root %llu inode %llu errors %x",
862                 (unsigned long long) root_objectid,
863                 (unsigned long long) rec->ino, rec->errors);
864
865         if (errors & I_ERR_NO_INODE_ITEM)
866                 fprintf(stderr, ", no inode item");
867         if (errors & I_ERR_NO_ORPHAN_ITEM)
868                 fprintf(stderr, ", no orphan item");
869         if (errors & I_ERR_DUP_INODE_ITEM)
870                 fprintf(stderr, ", dup inode item");
871         if (errors & I_ERR_DUP_DIR_INDEX)
872                 fprintf(stderr, ", dup dir index");
873         if (errors & I_ERR_ODD_DIR_ITEM)
874                 fprintf(stderr, ", odd dir item");
875         if (errors & I_ERR_ODD_FILE_EXTENT)
876                 fprintf(stderr, ", odd file extent");
877         if (errors & I_ERR_BAD_FILE_EXTENT)
878                 fprintf(stderr, ", bad file extent");
879         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880                 fprintf(stderr, ", file extent overlap");
881         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882                 fprintf(stderr, ", file extent discount");
883         if (errors & I_ERR_DIR_ISIZE_WRONG)
884                 fprintf(stderr, ", dir isize wrong");
885         if (errors & I_ERR_FILE_NBYTES_WRONG)
886                 fprintf(stderr, ", nbytes wrong");
887         if (errors & I_ERR_ODD_CSUM_ITEM)
888                 fprintf(stderr, ", odd csum item");
889         if (errors & I_ERR_SOME_CSUM_MISSING)
890                 fprintf(stderr, ", some csum missing");
891         if (errors & I_ERR_LINK_COUNT_WRONG)
892                 fprintf(stderr, ", link count wrong");
893         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894                 fprintf(stderr, ", orphan file extent");
895         fprintf(stderr, "\n");
896         /* Print the orphan extents if needed */
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
899
900         /* Print the holes if needed */
901         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902                 struct file_extent_hole *hole;
903                 struct rb_node *node;
904                 int found = 0;
905
906                 node = rb_first(&rec->holes);
907                 fprintf(stderr, "Found file extent holes:\n");
908                 while (node) {
909                         found = 1;
910                         hole = rb_entry(node, struct file_extent_hole, node);
911                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
912                                 hole->start, hole->len);
913                         node = rb_next(node);
914                 }
915                 if (!found)
916                         fprintf(stderr, "\tstart: 0, len: %llu\n",
917                                 round_up(rec->isize,
918                                          root->fs_info->sectorsize));
919         }
920 }
921
922 static void print_ref_error(int errors)
923 {
924         if (errors & REF_ERR_NO_DIR_ITEM)
925                 fprintf(stderr, ", no dir item");
926         if (errors & REF_ERR_NO_DIR_INDEX)
927                 fprintf(stderr, ", no dir index");
928         if (errors & REF_ERR_NO_INODE_REF)
929                 fprintf(stderr, ", no inode ref");
930         if (errors & REF_ERR_DUP_DIR_ITEM)
931                 fprintf(stderr, ", dup dir item");
932         if (errors & REF_ERR_DUP_DIR_INDEX)
933                 fprintf(stderr, ", dup dir index");
934         if (errors & REF_ERR_DUP_INODE_REF)
935                 fprintf(stderr, ", dup inode ref");
936         if (errors & REF_ERR_INDEX_UNMATCH)
937                 fprintf(stderr, ", index mismatch");
938         if (errors & REF_ERR_FILETYPE_UNMATCH)
939                 fprintf(stderr, ", filetype mismatch");
940         if (errors & REF_ERR_NAME_TOO_LONG)
941                 fprintf(stderr, ", name too long");
942         if (errors & REF_ERR_NO_ROOT_REF)
943                 fprintf(stderr, ", no root ref");
944         if (errors & REF_ERR_NO_ROOT_BACKREF)
945                 fprintf(stderr, ", no root backref");
946         if (errors & REF_ERR_DUP_ROOT_REF)
947                 fprintf(stderr, ", dup root ref");
948         if (errors & REF_ERR_DUP_ROOT_BACKREF)
949                 fprintf(stderr, ", dup root backref");
950         fprintf(stderr, "\n");
951 }
952
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
954                                           u64 ino, int mod)
955 {
956         struct ptr_node *node;
957         struct cache_extent *cache;
958         struct inode_record *rec = NULL;
959         int ret;
960
961         cache = lookup_cache_extent(inode_cache, ino, 1);
962         if (cache) {
963                 node = container_of(cache, struct ptr_node, cache);
964                 rec = node->data;
965                 if (mod && rec->refs > 1) {
966                         node->data = clone_inode_rec(rec);
967                         if (IS_ERR(node->data))
968                                 return node->data;
969                         rec->refs--;
970                         rec = node->data;
971                 }
972         } else if (mod) {
973                 rec = calloc(1, sizeof(*rec));
974                 if (!rec)
975                         return ERR_PTR(-ENOMEM);
976                 rec->ino = ino;
977                 rec->extent_start = (u64)-1;
978                 rec->refs = 1;
979                 INIT_LIST_HEAD(&rec->backrefs);
980                 INIT_LIST_HEAD(&rec->orphan_extents);
981                 rec->holes = RB_ROOT;
982
983                 node = malloc(sizeof(*node));
984                 if (!node) {
985                         free(rec);
986                         return ERR_PTR(-ENOMEM);
987                 }
988                 node->cache.start = ino;
989                 node->cache.size = 1;
990                 node->data = rec;
991
992                 if (ino == BTRFS_FREE_INO_OBJECTID)
993                         rec->found_link = 1;
994
995                 ret = insert_cache_extent(inode_cache, &node->cache);
996                 if (ret)
997                         return ERR_PTR(-EEXIST);
998         }
999         return rec;
1000 }
1001
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1003 {
1004         struct orphan_data_extent *orphan;
1005
1006         while (!list_empty(orphan_extents)) {
1007                 orphan = list_entry(orphan_extents->next,
1008                                     struct orphan_data_extent, list);
1009                 list_del(&orphan->list);
1010                 free(orphan);
1011         }
1012 }
1013
1014 static void free_inode_rec(struct inode_record *rec)
1015 {
1016         struct inode_backref *backref;
1017
1018         if (--rec->refs > 0)
1019                 return;
1020
1021         while (!list_empty(&rec->backrefs)) {
1022                 backref = to_inode_backref(rec->backrefs.next);
1023                 list_del(&backref->list);
1024                 free(backref);
1025         }
1026         free_orphan_data_extents(&rec->orphan_extents);
1027         free_file_extent_holes(&rec->holes);
1028         free(rec);
1029 }
1030
1031 static int can_free_inode_rec(struct inode_record *rec)
1032 {
1033         if (!rec->errors && rec->checked && rec->found_inode_item &&
1034             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1035                 return 1;
1036         return 0;
1037 }
1038
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040                                  struct inode_record *rec)
1041 {
1042         struct cache_extent *cache;
1043         struct inode_backref *tmp, *backref;
1044         struct ptr_node *node;
1045         u8 filetype;
1046
1047         if (!rec->found_inode_item)
1048                 return;
1049
1050         filetype = imode_to_type(rec->imode);
1051         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052                 if (backref->found_dir_item && backref->found_dir_index) {
1053                         if (backref->filetype != filetype)
1054                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055                         if (!backref->errors && backref->found_inode_ref &&
1056                             rec->nlink == rec->found_link) {
1057                                 list_del(&backref->list);
1058                                 free(backref);
1059                         }
1060                 }
1061         }
1062
1063         if (!rec->checked || rec->merging)
1064                 return;
1065
1066         if (S_ISDIR(rec->imode)) {
1067                 if (rec->found_size != rec->isize)
1068                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069                 if (rec->found_file_extent)
1070                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072                 if (rec->found_dir_item)
1073                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1074                 if (rec->found_size != rec->nbytes)
1075                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076                 if (rec->nlink > 0 && !no_holes &&
1077                     (rec->extent_end < rec->isize ||
1078                      first_extent_gap(&rec->holes) < rec->isize))
1079                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1080         }
1081
1082         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083                 if (rec->found_csum_item && rec->nodatasum)
1084                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085                 if (rec->some_csum_missing && !rec->nodatasum)
1086                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1087         }
1088
1089         BUG_ON(rec->refs != 1);
1090         if (can_free_inode_rec(rec)) {
1091                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092                 node = container_of(cache, struct ptr_node, cache);
1093                 BUG_ON(node->data != rec);
1094                 remove_cache_extent(inode_cache, &node->cache);
1095                 free(node);
1096                 free_inode_rec(rec);
1097         }
1098 }
1099
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1101 {
1102         struct btrfs_path path;
1103         struct btrfs_key key;
1104         int ret;
1105
1106         key.objectid = BTRFS_ORPHAN_OBJECTID;
1107         key.type = BTRFS_ORPHAN_ITEM_KEY;
1108         key.offset = ino;
1109
1110         btrfs_init_path(&path);
1111         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112         btrfs_release_path(&path);
1113         if (ret > 0)
1114                 ret = -ENOENT;
1115         return ret;
1116 }
1117
1118 static int process_inode_item(struct extent_buffer *eb,
1119                               int slot, struct btrfs_key *key,
1120                               struct shared_node *active_node)
1121 {
1122         struct inode_record *rec;
1123         struct btrfs_inode_item *item;
1124
1125         rec = active_node->current;
1126         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127         if (rec->found_inode_item) {
1128                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1129                 return 1;
1130         }
1131         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132         rec->nlink = btrfs_inode_nlink(eb, item);
1133         rec->isize = btrfs_inode_size(eb, item);
1134         rec->nbytes = btrfs_inode_nbytes(eb, item);
1135         rec->imode = btrfs_inode_mode(eb, item);
1136         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1137                 rec->nodatasum = 1;
1138         rec->found_inode_item = 1;
1139         if (rec->nlink == 0)
1140                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141         maybe_free_inode_rec(&active_node->inode_cache, rec);
1142         return 0;
1143 }
1144
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1146                                                 const char *name,
1147                                                 int namelen, u64 dir)
1148 {
1149         struct inode_backref *backref;
1150
1151         list_for_each_entry(backref, &rec->backrefs, list) {
1152                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1153                         break;
1154                 if (backref->dir != dir || backref->namelen != namelen)
1155                         continue;
1156                 if (memcmp(name, backref->name, namelen))
1157                         continue;
1158                 return backref;
1159         }
1160
1161         backref = malloc(sizeof(*backref) + namelen + 1);
1162         if (!backref)
1163                 return NULL;
1164         memset(backref, 0, sizeof(*backref));
1165         backref->dir = dir;
1166         backref->namelen = namelen;
1167         memcpy(backref->name, name, namelen);
1168         backref->name[namelen] = '\0';
1169         list_add_tail(&backref->list, &rec->backrefs);
1170         return backref;
1171 }
1172
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174                              u64 ino, u64 dir, u64 index,
1175                              const char *name, int namelen,
1176                              u8 filetype, u8 itemtype, int errors)
1177 {
1178         struct inode_record *rec;
1179         struct inode_backref *backref;
1180
1181         rec = get_inode_rec(inode_cache, ino, 1);
1182         BUG_ON(IS_ERR(rec));
1183         backref = get_inode_backref(rec, name, namelen, dir);
1184         BUG_ON(!backref);
1185         if (errors)
1186                 backref->errors |= errors;
1187         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188                 if (backref->found_dir_index)
1189                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190                 if (backref->found_inode_ref && backref->index != index)
1191                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1192                 if (backref->found_dir_item && backref->filetype != filetype)
1193                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1194
1195                 backref->index = index;
1196                 backref->filetype = filetype;
1197                 backref->found_dir_index = 1;
1198         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1199                 rec->found_link++;
1200                 if (backref->found_dir_item)
1201                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202                 if (backref->found_dir_index && backref->filetype != filetype)
1203                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1204
1205                 backref->filetype = filetype;
1206                 backref->found_dir_item = 1;
1207         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209                 if (backref->found_inode_ref)
1210                         backref->errors |= REF_ERR_DUP_INODE_REF;
1211                 if (backref->found_dir_index && backref->index != index)
1212                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1213                 else
1214                         backref->index = index;
1215
1216                 backref->ref_type = itemtype;
1217                 backref->found_inode_ref = 1;
1218         } else {
1219                 BUG_ON(1);
1220         }
1221
1222         maybe_free_inode_rec(inode_cache, rec);
1223         return 0;
1224 }
1225
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227                             struct cache_tree *dst_cache)
1228 {
1229         struct inode_backref *backref;
1230         u32 dir_count = 0;
1231         int ret = 0;
1232
1233         dst->merging = 1;
1234         list_for_each_entry(backref, &src->backrefs, list) {
1235                 if (backref->found_dir_index) {
1236                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1237                                         backref->index, backref->name,
1238                                         backref->namelen, backref->filetype,
1239                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1240                 }
1241                 if (backref->found_dir_item) {
1242                         dir_count++;
1243                         add_inode_backref(dst_cache, dst->ino,
1244                                         backref->dir, 0, backref->name,
1245                                         backref->namelen, backref->filetype,
1246                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1247                 }
1248                 if (backref->found_inode_ref) {
1249                         add_inode_backref(dst_cache, dst->ino,
1250                                         backref->dir, backref->index,
1251                                         backref->name, backref->namelen, 0,
1252                                         backref->ref_type, backref->errors);
1253                 }
1254         }
1255
1256         if (src->found_dir_item)
1257                 dst->found_dir_item = 1;
1258         if (src->found_file_extent)
1259                 dst->found_file_extent = 1;
1260         if (src->found_csum_item)
1261                 dst->found_csum_item = 1;
1262         if (src->some_csum_missing)
1263                 dst->some_csum_missing = 1;
1264         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1266                 if (ret < 0)
1267                         return ret;
1268         }
1269
1270         BUG_ON(src->found_link < dir_count);
1271         dst->found_link += src->found_link - dir_count;
1272         dst->found_size += src->found_size;
1273         if (src->extent_start != (u64)-1) {
1274                 if (dst->extent_start == (u64)-1) {
1275                         dst->extent_start = src->extent_start;
1276                         dst->extent_end = src->extent_end;
1277                 } else {
1278                         if (dst->extent_end > src->extent_start)
1279                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280                         else if (dst->extent_end < src->extent_start) {
1281                                 ret = add_file_extent_hole(&dst->holes,
1282                                         dst->extent_end,
1283                                         src->extent_start - dst->extent_end);
1284                         }
1285                         if (dst->extent_end < src->extent_end)
1286                                 dst->extent_end = src->extent_end;
1287                 }
1288         }
1289
1290         dst->errors |= src->errors;
1291         if (src->found_inode_item) {
1292                 if (!dst->found_inode_item) {
1293                         dst->nlink = src->nlink;
1294                         dst->isize = src->isize;
1295                         dst->nbytes = src->nbytes;
1296                         dst->imode = src->imode;
1297                         dst->nodatasum = src->nodatasum;
1298                         dst->found_inode_item = 1;
1299                 } else {
1300                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1301                 }
1302         }
1303         dst->merging = 0;
1304
1305         return 0;
1306 }
1307
1308 static int splice_shared_node(struct shared_node *src_node,
1309                               struct shared_node *dst_node)
1310 {
1311         struct cache_extent *cache;
1312         struct ptr_node *node, *ins;
1313         struct cache_tree *src, *dst;
1314         struct inode_record *rec, *conflict;
1315         u64 current_ino = 0;
1316         int splice = 0;
1317         int ret;
1318
1319         if (--src_node->refs == 0)
1320                 splice = 1;
1321         if (src_node->current)
1322                 current_ino = src_node->current->ino;
1323
1324         src = &src_node->root_cache;
1325         dst = &dst_node->root_cache;
1326 again:
1327         cache = search_cache_extent(src, 0);
1328         while (cache) {
1329                 node = container_of(cache, struct ptr_node, cache);
1330                 rec = node->data;
1331                 cache = next_cache_extent(cache);
1332
1333                 if (splice) {
1334                         remove_cache_extent(src, &node->cache);
1335                         ins = node;
1336                 } else {
1337                         ins = malloc(sizeof(*ins));
1338                         BUG_ON(!ins);
1339                         ins->cache.start = node->cache.start;
1340                         ins->cache.size = node->cache.size;
1341                         ins->data = rec;
1342                         rec->refs++;
1343                 }
1344                 ret = insert_cache_extent(dst, &ins->cache);
1345                 if (ret == -EEXIST) {
1346                         conflict = get_inode_rec(dst, rec->ino, 1);
1347                         BUG_ON(IS_ERR(conflict));
1348                         merge_inode_recs(rec, conflict, dst);
1349                         if (rec->checked) {
1350                                 conflict->checked = 1;
1351                                 if (dst_node->current == conflict)
1352                                         dst_node->current = NULL;
1353                         }
1354                         maybe_free_inode_rec(dst, conflict);
1355                         free_inode_rec(rec);
1356                         free(ins);
1357                 } else {
1358                         BUG_ON(ret);
1359                 }
1360         }
1361
1362         if (src == &src_node->root_cache) {
1363                 src = &src_node->inode_cache;
1364                 dst = &dst_node->inode_cache;
1365                 goto again;
1366         }
1367
1368         if (current_ino > 0 && (!dst_node->current ||
1369             current_ino > dst_node->current->ino)) {
1370                 if (dst_node->current) {
1371                         dst_node->current->checked = 1;
1372                         maybe_free_inode_rec(dst, dst_node->current);
1373                 }
1374                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375                 BUG_ON(IS_ERR(dst_node->current));
1376         }
1377         return 0;
1378 }
1379
1380 static void free_inode_ptr(struct cache_extent *cache)
1381 {
1382         struct ptr_node *node;
1383         struct inode_record *rec;
1384
1385         node = container_of(cache, struct ptr_node, cache);
1386         rec = node->data;
1387         free_inode_rec(rec);
1388         free(node);
1389 }
1390
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1392
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1394                                             u64 bytenr)
1395 {
1396         struct cache_extent *cache;
1397         struct shared_node *node;
1398
1399         cache = lookup_cache_extent(shared, bytenr, 1);
1400         if (cache) {
1401                 node = container_of(cache, struct shared_node, cache);
1402                 return node;
1403         }
1404         return NULL;
1405 }
1406
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1408 {
1409         int ret;
1410         struct shared_node *node;
1411
1412         node = calloc(1, sizeof(*node));
1413         if (!node)
1414                 return -ENOMEM;
1415         node->cache.start = bytenr;
1416         node->cache.size = 1;
1417         cache_tree_init(&node->root_cache);
1418         cache_tree_init(&node->inode_cache);
1419         node->refs = refs;
1420
1421         ret = insert_cache_extent(shared, &node->cache);
1422
1423         return ret;
1424 }
1425
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427                              struct walk_control *wc, int level)
1428 {
1429         struct shared_node *node;
1430         struct shared_node *dest;
1431         int ret;
1432
1433         if (level == wc->active_node)
1434                 return 0;
1435
1436         BUG_ON(wc->active_node <= level);
1437         node = find_shared_node(&wc->shared, bytenr);
1438         if (!node) {
1439                 ret = add_shared_node(&wc->shared, bytenr, refs);
1440                 BUG_ON(ret);
1441                 node = find_shared_node(&wc->shared, bytenr);
1442                 wc->nodes[level] = node;
1443                 wc->active_node = level;
1444                 return 0;
1445         }
1446
1447         if (wc->root_level == wc->active_node &&
1448             btrfs_root_refs(&root->root_item) == 0) {
1449                 if (--node->refs == 0) {
1450                         free_inode_recs_tree(&node->root_cache);
1451                         free_inode_recs_tree(&node->inode_cache);
1452                         remove_cache_extent(&wc->shared, &node->cache);
1453                         free(node);
1454                 }
1455                 return 1;
1456         }
1457
1458         dest = wc->nodes[wc->active_node];
1459         splice_shared_node(node, dest);
1460         if (node->refs == 0) {
1461                 remove_cache_extent(&wc->shared, &node->cache);
1462                 free(node);
1463         }
1464         return 1;
1465 }
1466
1467 static int leave_shared_node(struct btrfs_root *root,
1468                              struct walk_control *wc, int level)
1469 {
1470         struct shared_node *node;
1471         struct shared_node *dest;
1472         int i;
1473
1474         if (level == wc->root_level)
1475                 return 0;
1476
1477         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1478                 if (wc->nodes[i])
1479                         break;
1480         }
1481         BUG_ON(i >= BTRFS_MAX_LEVEL);
1482
1483         node = wc->nodes[wc->active_node];
1484         wc->nodes[wc->active_node] = NULL;
1485         wc->active_node = i;
1486
1487         dest = wc->nodes[wc->active_node];
1488         if (wc->active_node < wc->root_level ||
1489             btrfs_root_refs(&root->root_item) > 0) {
1490                 BUG_ON(node->refs <= 1);
1491                 splice_shared_node(node, dest);
1492         } else {
1493                 BUG_ON(node->refs < 2);
1494                 node->refs--;
1495         }
1496         return 0;
1497 }
1498
1499 /*
1500  * Returns:
1501  * < 0 - on error
1502  * 1   - if the root with id child_root_id is a child of root parent_root_id
1503  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1504  *       has other root(s) as parent(s)
1505  * 2   - if the root child_root_id doesn't have any parent roots
1506  */
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1508                          u64 child_root_id)
1509 {
1510         struct btrfs_path path;
1511         struct btrfs_key key;
1512         struct extent_buffer *leaf;
1513         int has_parent = 0;
1514         int ret;
1515
1516         btrfs_init_path(&path);
1517
1518         key.objectid = parent_root_id;
1519         key.type = BTRFS_ROOT_REF_KEY;
1520         key.offset = child_root_id;
1521         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1522                                 0, 0);
1523         if (ret < 0)
1524                 return ret;
1525         btrfs_release_path(&path);
1526         if (!ret)
1527                 return 1;
1528
1529         key.objectid = child_root_id;
1530         key.type = BTRFS_ROOT_BACKREF_KEY;
1531         key.offset = 0;
1532         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1533                                 0, 0);
1534         if (ret < 0)
1535                 goto out;
1536
1537         while (1) {
1538                 leaf = path.nodes[0];
1539                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1541                         if (ret)
1542                                 break;
1543                         leaf = path.nodes[0];
1544                 }
1545
1546                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547                 if (key.objectid != child_root_id ||
1548                     key.type != BTRFS_ROOT_BACKREF_KEY)
1549                         break;
1550
1551                 has_parent = 1;
1552
1553                 if (key.offset == parent_root_id) {
1554                         btrfs_release_path(&path);
1555                         return 1;
1556                 }
1557
1558                 path.slots[0]++;
1559         }
1560 out:
1561         btrfs_release_path(&path);
1562         if (ret < 0)
1563                 return ret;
1564         return has_parent ? 0 : 2;
1565 }
1566
1567 static int process_dir_item(struct extent_buffer *eb,
1568                             int slot, struct btrfs_key *key,
1569                             struct shared_node *active_node)
1570 {
1571         u32 total;
1572         u32 cur = 0;
1573         u32 len;
1574         u32 name_len;
1575         u32 data_len;
1576         int error;
1577         int nritems = 0;
1578         u8 filetype;
1579         struct btrfs_dir_item *di;
1580         struct inode_record *rec;
1581         struct cache_tree *root_cache;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_key location;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         root_cache = &active_node->root_cache;
1587         inode_cache = &active_node->inode_cache;
1588         rec = active_node->current;
1589         rec->found_dir_item = 1;
1590
1591         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592         total = btrfs_item_size_nr(eb, slot);
1593         while (cur < total) {
1594                 nritems++;
1595                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596                 name_len = btrfs_dir_name_len(eb, di);
1597                 data_len = btrfs_dir_data_len(eb, di);
1598                 filetype = btrfs_dir_type(eb, di);
1599
1600                 rec->found_size += name_len;
1601                 if (cur + sizeof(*di) + name_len > total ||
1602                     name_len > BTRFS_NAME_LEN) {
1603                         error = REF_ERR_NAME_TOO_LONG;
1604
1605                         if (cur + sizeof(*di) > total)
1606                                 break;
1607                         len = min_t(u32, total - cur - sizeof(*di),
1608                                     BTRFS_NAME_LEN);
1609                 } else {
1610                         len = name_len;
1611                         error = 0;
1612                 }
1613
1614                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1615
1616                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617                     key->offset != btrfs_name_hash(namebuf, len)) {
1618                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1619                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620                         key->objectid, key->offset, namebuf, len, filetype,
1621                         key->offset, btrfs_name_hash(namebuf, len));
1622                 }
1623
1624                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625                         add_inode_backref(inode_cache, location.objectid,
1626                                           key->objectid, key->offset, namebuf,
1627                                           len, filetype, key->type, error);
1628                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629                         add_inode_backref(root_cache, location.objectid,
1630                                           key->objectid, key->offset,
1631                                           namebuf, len, filetype,
1632                                           key->type, error);
1633                 } else {
1634                         fprintf(stderr, "invalid location in dir item %u\n",
1635                                 location.type);
1636                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637                                           key->objectid, key->offset, namebuf,
1638                                           len, filetype, key->type, error);
1639                 }
1640
1641                 len = sizeof(*di) + name_len + data_len;
1642                 di = (struct btrfs_dir_item *)((char *)di + len);
1643                 cur += len;
1644         }
1645         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1647
1648         return 0;
1649 }
1650
1651 static int process_inode_ref(struct extent_buffer *eb,
1652                              int slot, struct btrfs_key *key,
1653                              struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         int error;
1661         struct cache_tree *inode_cache;
1662         struct btrfs_inode_ref *ref;
1663         char namebuf[BTRFS_NAME_LEN];
1664
1665         inode_cache = &active_node->inode_cache;
1666
1667         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668         total = btrfs_item_size_nr(eb, slot);
1669         while (cur < total) {
1670                 name_len = btrfs_inode_ref_name_len(eb, ref);
1671                 index = btrfs_inode_ref_index(eb, ref);
1672
1673                 /* inode_ref + namelen should not cross item boundary */
1674                 if (cur + sizeof(*ref) + name_len > total ||
1675                     name_len > BTRFS_NAME_LEN) {
1676                         if (total < cur + sizeof(*ref))
1677                                 break;
1678
1679                         /* Still try to read out the remaining part */
1680                         len = min_t(u32, total - cur - sizeof(*ref),
1681                                     BTRFS_NAME_LEN);
1682                         error = REF_ERR_NAME_TOO_LONG;
1683                 } else {
1684                         len = name_len;
1685                         error = 0;
1686                 }
1687
1688                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689                 add_inode_backref(inode_cache, key->objectid, key->offset,
1690                                   index, namebuf, len, 0, key->type, error);
1691
1692                 len = sizeof(*ref) + name_len;
1693                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1694                 cur += len;
1695         }
1696         return 0;
1697 }
1698
1699 static int process_inode_extref(struct extent_buffer *eb,
1700                                 int slot, struct btrfs_key *key,
1701                                 struct shared_node *active_node)
1702 {
1703         u32 total;
1704         u32 cur = 0;
1705         u32 len;
1706         u32 name_len;
1707         u64 index;
1708         u64 parent;
1709         int error;
1710         struct cache_tree *inode_cache;
1711         struct btrfs_inode_extref *extref;
1712         char namebuf[BTRFS_NAME_LEN];
1713
1714         inode_cache = &active_node->inode_cache;
1715
1716         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717         total = btrfs_item_size_nr(eb, slot);
1718         while (cur < total) {
1719                 name_len = btrfs_inode_extref_name_len(eb, extref);
1720                 index = btrfs_inode_extref_index(eb, extref);
1721                 parent = btrfs_inode_extref_parent(eb, extref);
1722                 if (name_len <= BTRFS_NAME_LEN) {
1723                         len = name_len;
1724                         error = 0;
1725                 } else {
1726                         len = BTRFS_NAME_LEN;
1727                         error = REF_ERR_NAME_TOO_LONG;
1728                 }
1729                 read_extent_buffer(eb, namebuf,
1730                                    (unsigned long)(extref + 1), len);
1731                 add_inode_backref(inode_cache, key->objectid, parent,
1732                                   index, namebuf, len, 0, key->type, error);
1733
1734                 len = sizeof(*extref) + name_len;
1735                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1736                 cur += len;
1737         }
1738         return 0;
1739
1740 }
1741
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743                             u64 len, u64 *found)
1744 {
1745         struct btrfs_key key;
1746         struct btrfs_path path;
1747         struct extent_buffer *leaf;
1748         int ret;
1749         size_t size;
1750         *found = 0;
1751         u64 csum_end;
1752         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1753
1754         btrfs_init_path(&path);
1755
1756         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1757         key.offset = start;
1758         key.type = BTRFS_EXTENT_CSUM_KEY;
1759
1760         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1761                                 &key, &path, 0, 0);
1762         if (ret < 0)
1763                 goto out;
1764         if (ret > 0 && path.slots[0] > 0) {
1765                 leaf = path.nodes[0];
1766                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768                     key.type == BTRFS_EXTENT_CSUM_KEY)
1769                         path.slots[0]--;
1770         }
1771
1772         while (len > 0) {
1773                 leaf = path.nodes[0];
1774                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1776                         if (ret > 0)
1777                                 break;
1778                         else if (ret < 0)
1779                                 goto out;
1780                         leaf = path.nodes[0];
1781                 }
1782
1783                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785                     key.type != BTRFS_EXTENT_CSUM_KEY)
1786                         break;
1787
1788                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789                 if (key.offset >= start + len)
1790                         break;
1791
1792                 if (key.offset > start)
1793                         start = key.offset;
1794
1795                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796                 csum_end = key.offset + (size / csum_size) *
1797                            root->fs_info->sectorsize;
1798                 if (csum_end > start) {
1799                         size = min(csum_end - start, len);
1800                         len -= size;
1801                         start += size;
1802                         *found += size;
1803                 }
1804
1805                 path.slots[0]++;
1806         }
1807 out:
1808         btrfs_release_path(&path);
1809         if (ret < 0)
1810                 return ret;
1811         return 0;
1812 }
1813
1814 static int process_file_extent(struct btrfs_root *root,
1815                                 struct extent_buffer *eb,
1816                                 int slot, struct btrfs_key *key,
1817                                 struct shared_node *active_node)
1818 {
1819         struct inode_record *rec;
1820         struct btrfs_file_extent_item *fi;
1821         u64 num_bytes = 0;
1822         u64 disk_bytenr = 0;
1823         u64 extent_offset = 0;
1824         u64 mask = root->fs_info->sectorsize - 1;
1825         int extent_type;
1826         int ret;
1827
1828         rec = active_node->current;
1829         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830         rec->found_file_extent = 1;
1831
1832         if (rec->extent_start == (u64)-1) {
1833                 rec->extent_start = key->offset;
1834                 rec->extent_end = key->offset;
1835         }
1836
1837         if (rec->extent_end > key->offset)
1838                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839         else if (rec->extent_end < key->offset) {
1840                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841                                            key->offset - rec->extent_end);
1842                 if (ret < 0)
1843                         return ret;
1844         }
1845
1846         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847         extent_type = btrfs_file_extent_type(eb, fi);
1848
1849         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1851                 if (num_bytes == 0)
1852                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853                 rec->found_size += num_bytes;
1854                 num_bytes = (num_bytes + mask) & ~mask;
1855         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859                 extent_offset = btrfs_file_extent_offset(eb, fi);
1860                 if (num_bytes == 0 || (num_bytes & mask))
1861                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862                 if (num_bytes + extent_offset >
1863                     btrfs_file_extent_ram_bytes(eb, fi))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866                     (btrfs_file_extent_compression(eb, fi) ||
1867                      btrfs_file_extent_encryption(eb, fi) ||
1868                      btrfs_file_extent_other_encoding(eb, fi)))
1869                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870                 if (disk_bytenr > 0)
1871                         rec->found_size += num_bytes;
1872         } else {
1873                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874         }
1875         rec->extent_end = key->offset + num_bytes;
1876
1877         /*
1878          * The data reloc tree will copy full extents into its inode and then
1879          * copy the corresponding csums.  Because the extent it copied could be
1880          * a preallocated extent that hasn't been written to yet there may be no
1881          * csums to copy, ergo we won't have csums for our file extent.  This is
1882          * ok so just don't bother checking csums if the inode belongs to the
1883          * data reloc tree.
1884          */
1885         if (disk_bytenr > 0 &&
1886             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1887                 u64 found;
1888                 if (btrfs_file_extent_compression(eb, fi))
1889                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1890                 else
1891                         disk_bytenr += extent_offset;
1892
1893                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1894                 if (ret < 0)
1895                         return ret;
1896                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1897                         if (found > 0)
1898                                 rec->found_csum_item = 1;
1899                         if (found < num_bytes)
1900                                 rec->some_csum_missing = 1;
1901                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1902                         if (found > 0)
1903                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1904                 }
1905         }
1906         return 0;
1907 }
1908
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910                             struct walk_control *wc)
1911 {
1912         struct btrfs_key key;
1913         u32 nritems;
1914         int i;
1915         int ret = 0;
1916         struct cache_tree *inode_cache;
1917         struct shared_node *active_node;
1918
1919         if (wc->root_level == wc->active_node &&
1920             btrfs_root_refs(&root->root_item) == 0)
1921                 return 0;
1922
1923         active_node = wc->nodes[wc->active_node];
1924         inode_cache = &active_node->inode_cache;
1925         nritems = btrfs_header_nritems(eb);
1926         for (i = 0; i < nritems; i++) {
1927                 btrfs_item_key_to_cpu(eb, &key, i);
1928
1929                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1930                         continue;
1931                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1932                         continue;
1933
1934                 if (active_node->current == NULL ||
1935                     active_node->current->ino < key.objectid) {
1936                         if (active_node->current) {
1937                                 active_node->current->checked = 1;
1938                                 maybe_free_inode_rec(inode_cache,
1939                                                      active_node->current);
1940                         }
1941                         active_node->current = get_inode_rec(inode_cache,
1942                                                              key.objectid, 1);
1943                         BUG_ON(IS_ERR(active_node->current));
1944                 }
1945                 switch (key.type) {
1946                 case BTRFS_DIR_ITEM_KEY:
1947                 case BTRFS_DIR_INDEX_KEY:
1948                         ret = process_dir_item(eb, i, &key, active_node);
1949                         break;
1950                 case BTRFS_INODE_REF_KEY:
1951                         ret = process_inode_ref(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_EXTREF_KEY:
1954                         ret = process_inode_extref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_ITEM_KEY:
1957                         ret = process_inode_item(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_EXTENT_DATA_KEY:
1960                         ret = process_file_extent(root, eb, i, &key,
1961                                                   active_node);
1962                         break;
1963                 default:
1964                         break;
1965                 };
1966         }
1967         return ret;
1968 }
1969
1970 struct node_refs {
1971         u64 bytenr[BTRFS_MAX_LEVEL];
1972         u64 refs[BTRFS_MAX_LEVEL];
1973         int need_check[BTRFS_MAX_LEVEL];
1974 };
1975
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977                              struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979                             unsigned int ext_ref);
1980
1981 /*
1982  * Returns >0  Found error, not fatal, should continue
1983  * Returns <0  Fatal error, must exit the whole check
1984  * Returns 0   No errors found
1985  */
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987                                struct node_refs *nrefs, int *level, int ext_ref)
1988 {
1989         struct extent_buffer *cur = path->nodes[0];
1990         struct btrfs_key key;
1991         u64 cur_bytenr;
1992         u32 nritems;
1993         u64 first_ino = 0;
1994         int root_level = btrfs_header_level(root->node);
1995         int i;
1996         int ret = 0; /* Final return value */
1997         int err = 0; /* Positive error bitmap */
1998
1999         cur_bytenr = cur->start;
2000
2001         /* skip to first inode item or the first inode number change */
2002         nritems = btrfs_header_nritems(cur);
2003         for (i = 0; i < nritems; i++) {
2004                 btrfs_item_key_to_cpu(cur, &key, i);
2005                 if (i == 0)
2006                         first_ino = key.objectid;
2007                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008                     (first_ino && first_ino != key.objectid))
2009                         break;
2010         }
2011         if (i == nritems) {
2012                 path->slots[0] = nritems;
2013                 return 0;
2014         }
2015         path->slots[0] = i;
2016
2017 again:
2018         err |= check_inode_item(root, path, ext_ref);
2019
2020         /* modify cur since check_inode_item may change path */
2021         cur = path->nodes[0];
2022
2023         if (err & LAST_ITEM)
2024                 goto out;
2025
2026         /* still have inode items in thie leaf */
2027         if (cur->start == cur_bytenr)
2028                 goto again;
2029
2030         /*
2031          * we have switched to another leaf, above nodes may
2032          * have changed, here walk down the path, if a node
2033          * or leaf is shared, check whether we can skip this
2034          * node or leaf.
2035          */
2036         for (i = root_level; i >= 0; i--) {
2037                 if (path->nodes[i]->start == nrefs->bytenr[i])
2038                         continue;
2039
2040                 ret = update_nodes_refs(root,
2041                                 path->nodes[i]->start,
2042                                 nrefs, i);
2043                 if (ret)
2044                         goto out;
2045
2046                 if (!nrefs->need_check[i]) {
2047                         *level += 1;
2048                         break;
2049                 }
2050         }
2051
2052         for (i = 0; i < *level; i++) {
2053                 free_extent_buffer(path->nodes[i]);
2054                 path->nodes[i] = NULL;
2055         }
2056 out:
2057         err &= ~LAST_ITEM;
2058         if (err && !ret)
2059                 ret = err;
2060         return ret;
2061 }
2062
2063 static void reada_walk_down(struct btrfs_root *root,
2064                             struct extent_buffer *node, int slot)
2065 {
2066         struct btrfs_fs_info *fs_info = root->fs_info;
2067         u64 bytenr;
2068         u64 ptr_gen;
2069         u32 nritems;
2070         int i;
2071         int level;
2072
2073         level = btrfs_header_level(node);
2074         if (level != 1)
2075                 return;
2076
2077         nritems = btrfs_header_nritems(node);
2078         for (i = slot; i < nritems; i++) {
2079                 bytenr = btrfs_node_blockptr(node, i);
2080                 ptr_gen = btrfs_node_ptr_generation(node, i);
2081                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2082         }
2083 }
2084
2085 /*
2086  * Check the child node/leaf by the following condition:
2087  * 1. the first item key of the node/leaf should be the same with the one
2088  *    in parent.
2089  * 2. block in parent node should match the child node/leaf.
2090  * 3. generation of parent node and child's header should be consistent.
2091  *
2092  * Or the child node/leaf pointed by the key in parent is not valid.
2093  *
2094  * We hope to check leaf owner too, but since subvol may share leaves,
2095  * which makes leaf owner check not so strong, key check should be
2096  * sufficient enough for that case.
2097  */
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099                             struct extent_buffer *child)
2100 {
2101         struct btrfs_key parent_key;
2102         struct btrfs_key child_key;
2103         int ret = 0;
2104
2105         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106         if (btrfs_header_level(child) == 0)
2107                 btrfs_item_key_to_cpu(child, &child_key, 0);
2108         else
2109                 btrfs_node_key_to_cpu(child, &child_key, 0);
2110
2111         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2112                 ret = -EINVAL;
2113                 fprintf(stderr,
2114                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115                         parent_key.objectid, parent_key.type, parent_key.offset,
2116                         child_key.objectid, child_key.type, child_key.offset);
2117         }
2118         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2119                 ret = -EINVAL;
2120                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121                         btrfs_node_blockptr(parent, slot),
2122                         btrfs_header_bytenr(child));
2123         }
2124         if (btrfs_node_ptr_generation(parent, slot) !=
2125             btrfs_header_generation(child)) {
2126                 ret = -EINVAL;
2127                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128                         btrfs_header_generation(child),
2129                         btrfs_node_ptr_generation(parent, slot));
2130         }
2131         return ret;
2132 }
2133
2134 /*
2135  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136  * in every fs or file tree check. Here we find its all root ids, and only check
2137  * it in the fs or file tree which has the smallest root id.
2138  */
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2140 {
2141         struct rb_node *node;
2142         struct ulist_node *u;
2143
2144         if (roots->nnodes == 1)
2145                 return 1;
2146
2147         node = rb_first(&roots->root);
2148         u = rb_entry(node, struct ulist_node, rb_node);
2149         /*
2150          * current root id is not smallest, we skip it and let it be checked
2151          * in the fs or file tree who hash the smallest root id.
2152          */
2153         if (root->objectid != u->val)
2154                 return 0;
2155
2156         return 1;
2157 }
2158
2159 /*
2160  * for a tree node or leaf, we record its reference count, so later if we still
2161  * process this node or leaf, don't need to compute its reference count again.
2162  */
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164                              struct node_refs *nrefs, u64 level)
2165 {
2166         int check, ret;
2167         u64 refs;
2168         struct ulist *roots;
2169
2170         if (nrefs->bytenr[level] != bytenr) {
2171                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172                                        level, 1, &refs, NULL);
2173                 if (ret < 0)
2174                         return ret;
2175
2176                 nrefs->bytenr[level] = bytenr;
2177                 nrefs->refs[level] = refs;
2178                 if (refs > 1) {
2179                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2180                                                    0, &roots);
2181                         if (ret)
2182                                 return -EIO;
2183
2184                         check = need_check(root, roots);
2185                         ulist_free(roots);
2186                         nrefs->need_check[level] = check;
2187                 } else {
2188                         nrefs->need_check[level] = 1;
2189                 }
2190         }
2191
2192         return 0;
2193 }
2194
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196                           struct walk_control *wc, int *level,
2197                           struct node_refs *nrefs)
2198 {
2199         enum btrfs_tree_block_status status;
2200         u64 bytenr;
2201         u64 ptr_gen;
2202         struct btrfs_fs_info *fs_info = root->fs_info;
2203         struct extent_buffer *next;
2204         struct extent_buffer *cur;
2205         int ret, err = 0;
2206         u64 refs;
2207
2208         WARN_ON(*level < 0);
2209         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2210
2211         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212                 refs = nrefs->refs[*level];
2213                 ret = 0;
2214         } else {
2215                 ret = btrfs_lookup_extent_info(NULL, root,
2216                                        path->nodes[*level]->start,
2217                                        *level, 1, &refs, NULL);
2218                 if (ret < 0) {
2219                         err = ret;
2220                         goto out;
2221                 }
2222                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223                 nrefs->refs[*level] = refs;
2224         }
2225
2226         if (refs > 1) {
2227                 ret = enter_shared_node(root, path->nodes[*level]->start,
2228                                         refs, wc, *level);
2229                 if (ret > 0) {
2230                         err = ret;
2231                         goto out;
2232                 }
2233         }
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 if (*level == 0) {
2246                         ret = process_one_leaf(root, cur, wc);
2247                         if (ret < 0)
2248                                 err = ret;
2249                         break;
2250                 }
2251                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2253
2254                 if (bytenr == nrefs->bytenr[*level - 1]) {
2255                         refs = nrefs->refs[*level - 1];
2256                 } else {
2257                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258                                         *level - 1, 1, &refs, NULL);
2259                         if (ret < 0) {
2260                                 refs = 0;
2261                         } else {
2262                                 nrefs->bytenr[*level - 1] = bytenr;
2263                                 nrefs->refs[*level - 1] = refs;
2264                         }
2265                 }
2266
2267                 if (refs > 1) {
2268                         ret = enter_shared_node(root, bytenr, refs,
2269                                                 wc, *level - 1);
2270                         if (ret > 0) {
2271                                 path->slots[*level]++;
2272                                 continue;
2273                         }
2274                 }
2275
2276                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278                         free_extent_buffer(next);
2279                         reada_walk_down(root, cur, path->slots[*level]);
2280                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281                         if (!extent_buffer_uptodate(next)) {
2282                                 struct btrfs_key node_key;
2283
2284                                 btrfs_node_key_to_cpu(path->nodes[*level],
2285                                                       &node_key,
2286                                                       path->slots[*level]);
2287                                 btrfs_add_corrupt_extent_record(root->fs_info,
2288                                                 &node_key,
2289                                                 path->nodes[*level]->start,
2290                                                 root->fs_info->nodesize,
2291                                                 *level);
2292                                 err = -EIO;
2293                                 goto out;
2294                         }
2295                 }
2296
2297                 ret = check_child_node(cur, path->slots[*level], next);
2298                 if (ret) {
2299                         free_extent_buffer(next);
2300                         err = ret;
2301                         goto out;
2302                 }
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         err = -EIO;
2311                         goto out;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319 out:
2320         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2321         return err;
2322 }
2323
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325                             unsigned int ext_ref);
2326
2327 /*
2328  * Returns >0  Found error, should continue
2329  * Returns <0  Fatal error, must exit the whole check
2330  * Returns 0   No errors found
2331  */
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333                              int *level, struct node_refs *nrefs, int ext_ref)
2334 {
2335         enum btrfs_tree_block_status status;
2336         u64 bytenr;
2337         u64 ptr_gen;
2338         struct btrfs_fs_info *fs_info = root->fs_info;
2339         struct extent_buffer *next;
2340         struct extent_buffer *cur;
2341         int ret;
2342
2343         WARN_ON(*level < 0);
2344         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2345
2346         ret = update_nodes_refs(root, path->nodes[*level]->start,
2347                                 nrefs, *level);
2348         if (ret < 0)
2349                 return ret;
2350
2351         while (*level >= 0) {
2352                 WARN_ON(*level < 0);
2353                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354                 cur = path->nodes[*level];
2355
2356                 if (btrfs_header_level(cur) != *level)
2357                         WARN_ON(1);
2358
2359                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2360                         break;
2361                 /* Don't forgot to check leaf/node validation */
2362                 if (*level == 0) {
2363                         ret = btrfs_check_leaf(root, NULL, cur);
2364                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2365                                 ret = -EIO;
2366                                 break;
2367                         }
2368                         ret = process_one_leaf_v2(root, path, nrefs,
2369                                                   level, ext_ref);
2370                         cur = path->nodes[*level];
2371                         break;
2372                 } else {
2373                         ret = btrfs_check_node(root, NULL, cur);
2374                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2375                                 ret = -EIO;
2376                                 break;
2377                         }
2378                 }
2379                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2381
2382                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2383                 if (ret)
2384                         break;
2385                 if (!nrefs->need_check[*level - 1]) {
2386                         path->slots[*level]++;
2387                         continue;
2388                 }
2389
2390                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392                         free_extent_buffer(next);
2393                         reada_walk_down(root, cur, path->slots[*level]);
2394                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2395                         if (!extent_buffer_uptodate(next)) {
2396                                 struct btrfs_key node_key;
2397
2398                                 btrfs_node_key_to_cpu(path->nodes[*level],
2399                                                       &node_key,
2400                                                       path->slots[*level]);
2401                                 btrfs_add_corrupt_extent_record(fs_info,
2402                                                 &node_key,
2403                                                 path->nodes[*level]->start,
2404                                                 fs_info->nodesize,
2405                                                 *level);
2406                                 ret = -EIO;
2407                                 break;
2408                         }
2409                 }
2410
2411                 ret = check_child_node(cur, path->slots[*level], next);
2412                 if (ret < 0) 
2413                         break;
2414
2415                 if (btrfs_is_leaf(next))
2416                         status = btrfs_check_leaf(root, NULL, next);
2417                 else
2418                         status = btrfs_check_node(root, NULL, next);
2419                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420                         free_extent_buffer(next);
2421                         ret = -EIO;
2422                         break;
2423                 }
2424
2425                 *level = *level - 1;
2426                 free_extent_buffer(path->nodes[*level]);
2427                 path->nodes[*level] = next;
2428                 path->slots[*level] = 0;
2429         }
2430         return ret;
2431 }
2432
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434                         struct walk_control *wc, int *level)
2435 {
2436         int i;
2437         struct extent_buffer *leaf;
2438
2439         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440                 leaf = path->nodes[i];
2441                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2442                         path->slots[i]++;
2443                         *level = i;
2444                         return 0;
2445                 } else {
2446                         free_extent_buffer(path->nodes[*level]);
2447                         path->nodes[*level] = NULL;
2448                         BUG_ON(*level > wc->active_node);
2449                         if (*level == wc->active_node)
2450                                 leave_shared_node(root, wc, *level);
2451                         *level = i + 1;
2452                 }
2453         }
2454         return 1;
2455 }
2456
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2458                            int *level)
2459 {
2460         int i;
2461         struct extent_buffer *leaf;
2462
2463         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464                 leaf = path->nodes[i];
2465                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2466                         path->slots[i]++;
2467                         *level = i;
2468                         return 0;
2469                 } else {
2470                         free_extent_buffer(path->nodes[*level]);
2471                         path->nodes[*level] = NULL;
2472                         *level = i + 1;
2473                 }
2474         }
2475         return 1;
2476 }
2477
2478 static int check_root_dir(struct inode_record *rec)
2479 {
2480         struct inode_backref *backref;
2481         int ret = -1;
2482
2483         if (!rec->found_inode_item || rec->errors)
2484                 goto out;
2485         if (rec->nlink != 1 || rec->found_link != 0)
2486                 goto out;
2487         if (list_empty(&rec->backrefs))
2488                 goto out;
2489         backref = to_inode_backref(rec->backrefs.next);
2490         if (!backref->found_inode_ref)
2491                 goto out;
2492         if (backref->index != 0 || backref->namelen != 2 ||
2493             memcmp(backref->name, "..", 2))
2494                 goto out;
2495         if (backref->found_dir_index || backref->found_dir_item)
2496                 goto out;
2497         ret = 0;
2498 out:
2499         return ret;
2500 }
2501
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503                               struct btrfs_root *root, struct btrfs_path *path,
2504                               struct inode_record *rec)
2505 {
2506         struct btrfs_inode_item *ei;
2507         struct btrfs_key key;
2508         int ret;
2509
2510         key.objectid = rec->ino;
2511         key.type = BTRFS_INODE_ITEM_KEY;
2512         key.offset = (u64)-1;
2513
2514         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2515         if (ret < 0)
2516                 goto out;
2517         if (ret) {
2518                 if (!path->slots[0]) {
2519                         ret = -ENOENT;
2520                         goto out;
2521                 }
2522                 path->slots[0]--;
2523                 ret = 0;
2524         }
2525         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526         if (key.objectid != rec->ino) {
2527                 ret = -ENOENT;
2528                 goto out;
2529         }
2530
2531         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532                             struct btrfs_inode_item);
2533         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534         btrfs_mark_buffer_dirty(path->nodes[0]);
2535         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537                root->root_key.objectid);
2538 out:
2539         btrfs_release_path(path);
2540         return ret;
2541 }
2542
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544                                     struct btrfs_root *root,
2545                                     struct btrfs_path *path,
2546                                     struct inode_record *rec)
2547 {
2548         int ret;
2549
2550         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551         btrfs_release_path(path);
2552         if (!ret)
2553                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2554         return ret;
2555 }
2556
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558                                struct btrfs_root *root,
2559                                struct btrfs_path *path,
2560                                struct inode_record *rec)
2561 {
2562         struct btrfs_inode_item *ei;
2563         struct btrfs_key key;
2564         int ret = 0;
2565
2566         key.objectid = rec->ino;
2567         key.type = BTRFS_INODE_ITEM_KEY;
2568         key.offset = 0;
2569
2570         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2571         if (ret) {
2572                 if (ret > 0)
2573                         ret = -ENOENT;
2574                 goto out;
2575         }
2576
2577         /* Since ret == 0, no need to check anything */
2578         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579                             struct btrfs_inode_item);
2580         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581         btrfs_mark_buffer_dirty(path->nodes[0]);
2582         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583         printf("reset nbytes for ino %llu root %llu\n",
2584                rec->ino, root->root_key.objectid);
2585 out:
2586         btrfs_release_path(path);
2587         return ret;
2588 }
2589
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591                                  struct cache_tree *inode_cache,
2592                                  struct inode_record *rec,
2593                                  struct inode_backref *backref)
2594 {
2595         struct btrfs_path path;
2596         struct btrfs_trans_handle *trans;
2597         struct btrfs_dir_item *dir_item;
2598         struct extent_buffer *leaf;
2599         struct btrfs_key key;
2600         struct btrfs_disk_key disk_key;
2601         struct inode_record *dir_rec;
2602         unsigned long name_ptr;
2603         u32 data_size = sizeof(*dir_item) + backref->namelen;
2604         int ret;
2605
2606         trans = btrfs_start_transaction(root, 1);
2607         if (IS_ERR(trans))
2608                 return PTR_ERR(trans);
2609
2610         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611                 (unsigned long long)rec->ino);
2612
2613         btrfs_init_path(&path);
2614         key.objectid = backref->dir;
2615         key.type = BTRFS_DIR_INDEX_KEY;
2616         key.offset = backref->index;
2617         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2618         BUG_ON(ret);
2619
2620         leaf = path.nodes[0];
2621         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2622
2623         disk_key.objectid = cpu_to_le64(rec->ino);
2624         disk_key.type = BTRFS_INODE_ITEM_KEY;
2625         disk_key.offset = 0;
2626
2627         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629         btrfs_set_dir_data_len(leaf, dir_item, 0);
2630         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631         name_ptr = (unsigned long)(dir_item + 1);
2632         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633         btrfs_mark_buffer_dirty(leaf);
2634         btrfs_release_path(&path);
2635         btrfs_commit_transaction(trans, root);
2636
2637         backref->found_dir_index = 1;
2638         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639         BUG_ON(IS_ERR(dir_rec));
2640         if (!dir_rec)
2641                 return 0;
2642         dir_rec->found_size += backref->namelen;
2643         if (dir_rec->found_size == dir_rec->isize &&
2644             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646         if (dir_rec->found_size != dir_rec->isize)
2647                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2648
2649         return 0;
2650 }
2651
2652 static int delete_dir_index(struct btrfs_root *root,
2653                             struct inode_backref *backref)
2654 {
2655         struct btrfs_trans_handle *trans;
2656         struct btrfs_dir_item *di;
2657         struct btrfs_path path;
2658         int ret = 0;
2659
2660         trans = btrfs_start_transaction(root, 1);
2661         if (IS_ERR(trans))
2662                 return PTR_ERR(trans);
2663
2664         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665                 (unsigned long long)backref->dir,
2666                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667                 (unsigned long long)root->objectid);
2668
2669         btrfs_init_path(&path);
2670         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671                                     backref->name, backref->namelen,
2672                                     backref->index, -1);
2673         if (IS_ERR(di)) {
2674                 ret = PTR_ERR(di);
2675                 btrfs_release_path(&path);
2676                 btrfs_commit_transaction(trans, root);
2677                 if (ret == -ENOENT)
2678                         return 0;
2679                 return ret;
2680         }
2681
2682         if (!di)
2683                 ret = btrfs_del_item(trans, root, &path);
2684         else
2685                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2686         BUG_ON(ret);
2687         btrfs_release_path(&path);
2688         btrfs_commit_transaction(trans, root);
2689         return ret;
2690 }
2691
2692 static int create_inode_item(struct btrfs_root *root,
2693                              struct inode_record *rec,
2694                              int root_dir)
2695 {
2696         struct btrfs_trans_handle *trans;
2697         struct btrfs_inode_item inode_item;
2698         time_t now = time(NULL);
2699         int ret;
2700
2701         trans = btrfs_start_transaction(root, 1);
2702         if (IS_ERR(trans)) {
2703                 ret = PTR_ERR(trans);
2704                 return ret;
2705         }
2706
2707         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708                 "be incomplete, please check permissions and content after "
2709                 "the fsck completes.\n", (unsigned long long)root->objectid,
2710                 (unsigned long long)rec->ino);
2711
2712         memset(&inode_item, 0, sizeof(inode_item));
2713         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2714         if (root_dir)
2715                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2716         else
2717                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719         if (rec->found_dir_item) {
2720                 if (rec->found_file_extent)
2721                         fprintf(stderr, "root %llu inode %llu has both a dir "
2722                                 "item and extents, unsure if it is a dir or a "
2723                                 "regular file so setting it as a directory\n",
2724                                 (unsigned long long)root->objectid,
2725                                 (unsigned long long)rec->ino);
2726                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728         } else if (!rec->found_dir_item) {
2729                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2731         }
2732         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2740
2741         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2742         BUG_ON(ret);
2743         btrfs_commit_transaction(trans, root);
2744         return 0;
2745 }
2746
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748                                  struct inode_record *rec,
2749                                  struct cache_tree *inode_cache,
2750                                  int delete)
2751 {
2752         struct inode_backref *tmp, *backref;
2753         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2754         int ret = 0;
2755         int repaired = 0;
2756
2757         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758                 if (!delete && rec->ino == root_dirid) {
2759                         if (!rec->found_inode_item) {
2760                                 ret = create_inode_item(root, rec, 1);
2761                                 if (ret)
2762                                         break;
2763                                 repaired++;
2764                         }
2765                 }
2766
2767                 /* Index 0 for root dir's are special, don't mess with it */
2768                 if (rec->ino == root_dirid && backref->index == 0)
2769                         continue;
2770
2771                 if (delete &&
2772                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2773                      (backref->found_dir_index && backref->found_inode_ref &&
2774                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775                         ret = delete_dir_index(root, backref);
2776                         if (ret)
2777                                 break;
2778                         repaired++;
2779                         list_del(&backref->list);
2780                         free(backref);
2781                         continue;
2782                 }
2783
2784                 if (!delete && !backref->found_dir_index &&
2785                     backref->found_dir_item && backref->found_inode_ref) {
2786                         ret = add_missing_dir_index(root, inode_cache, rec,
2787                                                     backref);
2788                         if (ret)
2789                                 break;
2790                         repaired++;
2791                         if (backref->found_dir_item &&
2792                             backref->found_dir_index) {
2793                                 if (!backref->errors &&
2794                                     backref->found_inode_ref) {
2795                                         list_del(&backref->list);
2796                                         free(backref);
2797                                         continue;
2798                                 }
2799                         }
2800                 }
2801
2802                 if (!delete && (!backref->found_dir_index &&
2803                                 !backref->found_dir_item &&
2804                                 backref->found_inode_ref)) {
2805                         struct btrfs_trans_handle *trans;
2806                         struct btrfs_key location;
2807
2808                         ret = check_dir_conflict(root, backref->name,
2809                                                  backref->namelen,
2810                                                  backref->dir,
2811                                                  backref->index);
2812                         if (ret) {
2813                                 /*
2814                                  * let nlink fixing routine to handle it,
2815                                  * which can do it better.
2816                                  */
2817                                 ret = 0;
2818                                 break;
2819                         }
2820                         location.objectid = rec->ino;
2821                         location.type = BTRFS_INODE_ITEM_KEY;
2822                         location.offset = 0;
2823
2824                         trans = btrfs_start_transaction(root, 1);
2825                         if (IS_ERR(trans)) {
2826                                 ret = PTR_ERR(trans);
2827                                 break;
2828                         }
2829                         fprintf(stderr, "adding missing dir index/item pair "
2830                                 "for inode %llu\n",
2831                                 (unsigned long long)rec->ino);
2832                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2833                                                     backref->namelen,
2834                                                     backref->dir, &location,
2835                                                     imode_to_type(rec->imode),
2836                                                     backref->index);
2837                         BUG_ON(ret);
2838                         btrfs_commit_transaction(trans, root);
2839                         repaired++;
2840                 }
2841
2842                 if (!delete && (backref->found_inode_ref &&
2843                                 backref->found_dir_index &&
2844                                 backref->found_dir_item &&
2845                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846                                 !rec->found_inode_item)) {
2847                         ret = create_inode_item(root, rec, 0);
2848                         if (ret)
2849                                 break;
2850                         repaired++;
2851                 }
2852
2853         }
2854         return ret ? ret : repaired;
2855 }
2856
2857 /*
2858  * To determine the file type for nlink/inode_item repair
2859  *
2860  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861  * Return -ENOENT if file type is not found.
2862  */
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2864 {
2865         struct inode_backref *backref;
2866
2867         /* For inode item recovered case */
2868         if (rec->found_inode_item) {
2869                 *type = imode_to_type(rec->imode);
2870                 return 0;
2871         }
2872
2873         list_for_each_entry(backref, &rec->backrefs, list) {
2874                 if (backref->found_dir_index || backref->found_dir_item) {
2875                         *type = backref->filetype;
2876                         return 0;
2877                 }
2878         }
2879         return -ENOENT;
2880 }
2881
2882 /*
2883  * To determine the file name for nlink repair
2884  *
2885  * Return 0 if file name is found, set name and namelen.
2886  * Return -ENOENT if file name is not found.
2887  */
2888 static int find_file_name(struct inode_record *rec,
2889                           char *name, int *namelen)
2890 {
2891         struct inode_backref *backref;
2892
2893         list_for_each_entry(backref, &rec->backrefs, list) {
2894                 if (backref->found_dir_index || backref->found_dir_item ||
2895                     backref->found_inode_ref) {
2896                         memcpy(name, backref->name, backref->namelen);
2897                         *namelen = backref->namelen;
2898                         return 0;
2899                 }
2900         }
2901         return -ENOENT;
2902 }
2903
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906                        struct btrfs_root *root,
2907                        struct btrfs_path *path,
2908                        struct inode_record *rec)
2909 {
2910         struct inode_backref *backref;
2911         struct inode_backref *tmp;
2912         struct btrfs_key key;
2913         struct btrfs_inode_item *inode_item;
2914         int ret = 0;
2915
2916         /* We don't believe this either, reset it and iterate backref */
2917         rec->found_link = 0;
2918
2919         /* Remove all backref including the valid ones */
2920         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922                                    backref->index, backref->name,
2923                                    backref->namelen, 0);
2924                 if (ret < 0)
2925                         goto out;
2926
2927                 /* remove invalid backref, so it won't be added back */
2928                 if (!(backref->found_dir_index &&
2929                       backref->found_dir_item &&
2930                       backref->found_inode_ref)) {
2931                         list_del(&backref->list);
2932                         free(backref);
2933                 } else {
2934                         rec->found_link++;
2935                 }
2936         }
2937
2938         /* Set nlink to 0 */
2939         key.objectid = rec->ino;
2940         key.type = BTRFS_INODE_ITEM_KEY;
2941         key.offset = 0;
2942         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2943         if (ret < 0)
2944                 goto out;
2945         if (ret > 0) {
2946                 ret = -ENOENT;
2947                 goto out;
2948         }
2949         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950                                     struct btrfs_inode_item);
2951         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952         btrfs_mark_buffer_dirty(path->nodes[0]);
2953         btrfs_release_path(path);
2954
2955         /*
2956          * Add back valid inode_ref/dir_item/dir_index,
2957          * add_link() will handle the nlink inc, so new nlink must be correct
2958          */
2959         list_for_each_entry(backref, &rec->backrefs, list) {
2960                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961                                      backref->name, backref->namelen,
2962                                      backref->filetype, &backref->index, 1);
2963                 if (ret < 0)
2964                         goto out;
2965         }
2966 out:
2967         btrfs_release_path(path);
2968         return ret;
2969 }
2970
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972                                 struct btrfs_root *root,
2973                                 struct btrfs_path *path,
2974                                 u64 *highest_ino)
2975 {
2976         struct btrfs_key key, found_key;
2977         int ret;
2978
2979         btrfs_init_path(path);
2980         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2981         key.offset = -1;
2982         key.type = BTRFS_INODE_ITEM_KEY;
2983         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2984         if (ret == 1) {
2985                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986                                 path->slots[0] - 1);
2987                 *highest_ino = found_key.objectid;
2988                 ret = 0;
2989         }
2990         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2991                 ret = -EOVERFLOW;
2992         btrfs_release_path(path);
2993         return ret;
2994 }
2995
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997                                struct btrfs_root *root,
2998                                struct btrfs_path *path,
2999                                struct inode_record *rec)
3000 {
3001         char *dir_name = "lost+found";
3002         char namebuf[BTRFS_NAME_LEN] = {0};
3003         u64 lost_found_ino;
3004         u32 mode = 0700;
3005         u8 type = 0;
3006         int namelen = 0;
3007         int name_recovered = 0;
3008         int type_recovered = 0;
3009         int ret = 0;
3010
3011         /*
3012          * Get file name and type first before these invalid inode ref
3013          * are deleted by remove_all_invalid_backref()
3014          */
3015         name_recovered = !find_file_name(rec, namebuf, &namelen);
3016         type_recovered = !find_file_type(rec, &type);
3017
3018         if (!name_recovered) {
3019                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020                        rec->ino, rec->ino);
3021                 namelen = count_digits(rec->ino);
3022                 sprintf(namebuf, "%llu", rec->ino);
3023                 name_recovered = 1;
3024         }
3025         if (!type_recovered) {
3026                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3027                        rec->ino);
3028                 type = BTRFS_FT_REG_FILE;
3029                 type_recovered = 1;
3030         }
3031
3032         ret = reset_nlink(trans, root, path, rec);
3033         if (ret < 0) {
3034                 fprintf(stderr,
3035                         "Failed to reset nlink for inode %llu: %s\n",
3036                         rec->ino, strerror(-ret));
3037                 goto out;
3038         }
3039
3040         if (rec->found_link == 0) {
3041                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3042                 if (ret < 0)
3043                         goto out;
3044                 lost_found_ino++;
3045                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3047                                   mode);
3048                 if (ret < 0) {
3049                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050                                 dir_name, strerror(-ret));
3051                         goto out;
3052                 }
3053                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054                                      namebuf, namelen, type, NULL, 1);
3055                 /*
3056                  * Add ".INO" suffix several times to handle case where
3057                  * "FILENAME.INO" is already taken by another file.
3058                  */
3059                 while (ret == -EEXIST) {
3060                         /*
3061                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3062                          */
3063                         if (namelen + count_digits(rec->ino) + 1 >
3064                             BTRFS_NAME_LEN) {
3065                                 ret = -EFBIG;
3066                                 goto out;
3067                         }
3068                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3069                                  ".%llu", rec->ino);
3070                         namelen += count_digits(rec->ino) + 1;
3071                         ret = btrfs_add_link(trans, root, rec->ino,
3072                                              lost_found_ino, namebuf,
3073                                              namelen, type, NULL, 1);
3074                 }
3075                 if (ret < 0) {
3076                         fprintf(stderr,
3077                                 "Failed to link the inode %llu to %s dir: %s\n",
3078                                 rec->ino, dir_name, strerror(-ret));
3079                         goto out;
3080                 }
3081                 /*
3082                  * Just increase the found_link, don't actually add the
3083                  * backref. This will make things easier and this inode
3084                  * record will be freed after the repair is done.
3085                  * So fsck will not report problem about this inode.
3086                  */
3087                 rec->found_link++;
3088                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089                        namelen, namebuf, dir_name);
3090         }
3091         printf("Fixed the nlink of inode %llu\n", rec->ino);
3092 out:
3093         /*
3094          * Clear the flag anyway, or we will loop forever for the same inode
3095          * as it will not be removed from the bad inode list and the dead loop
3096          * happens.
3097          */
3098         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099         btrfs_release_path(path);
3100         return ret;
3101 }
3102
3103 /*
3104  * Check if there is any normal(reg or prealloc) file extent for given
3105  * ino.
3106  * This is used to determine the file type when neither its dir_index/item or
3107  * inode_item exists.
3108  *
3109  * This will *NOT* report error, if any error happens, just consider it does
3110  * not have any normal file extent.
3111  */
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3113 {
3114         struct btrfs_path path;
3115         struct btrfs_key key;
3116         struct btrfs_key found_key;
3117         struct btrfs_file_extent_item *fi;
3118         u8 type;
3119         int ret = 0;
3120
3121         btrfs_init_path(&path);
3122         key.objectid = ino;
3123         key.type = BTRFS_EXTENT_DATA_KEY;
3124         key.offset = 0;
3125
3126         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3127         if (ret < 0) {
3128                 ret = 0;
3129                 goto out;
3130         }
3131         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132                 ret = btrfs_next_leaf(root, &path);
3133                 if (ret) {
3134                         ret = 0;
3135                         goto out;
3136                 }
3137         }
3138         while (1) {
3139                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3140                                       path.slots[0]);
3141                 if (found_key.objectid != ino ||
3142                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3143                         break;
3144                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145                                     struct btrfs_file_extent_item);
3146                 type = btrfs_file_extent_type(path.nodes[0], fi);
3147                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3148                         ret = 1;
3149                         goto out;
3150                 }
3151         }
3152 out:
3153         btrfs_release_path(&path);
3154         return ret;
3155 }
3156
3157 static u32 btrfs_type_to_imode(u8 type)
3158 {
3159         static u32 imode_by_btrfs_type[] = {
3160                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3161                 [BTRFS_FT_DIR]          = S_IFDIR,
3162                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3163                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3164                 [BTRFS_FT_FIFO]         = S_IFIFO,
3165                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3166                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3167         };
3168
3169         return imode_by_btrfs_type[(type)];
3170 }
3171
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173                                 struct btrfs_root *root,
3174                                 struct btrfs_path *path,
3175                                 struct inode_record *rec)
3176 {
3177         u8 filetype;
3178         u32 mode = 0700;
3179         int type_recovered = 0;
3180         int ret = 0;
3181
3182         printf("Trying to rebuild inode:%llu\n", rec->ino);
3183
3184         type_recovered = !find_file_type(rec, &filetype);
3185
3186         /*
3187          * Try to determine inode type if type not found.
3188          *
3189          * For found regular file extent, it must be FILE.
3190          * For found dir_item/index, it must be DIR.
3191          *
3192          * For undetermined one, use FILE as fallback.
3193          *
3194          * TODO:
3195          * 1. If found backref(inode_index/item is already handled) to it,
3196          *    it must be DIR.
3197          *    Need new inode-inode ref structure to allow search for that.
3198          */
3199         if (!type_recovered) {
3200                 if (rec->found_file_extent &&
3201                     find_normal_file_extent(root, rec->ino)) {
3202                         type_recovered = 1;
3203                         filetype = BTRFS_FT_REG_FILE;
3204                 } else if (rec->found_dir_item) {
3205                         type_recovered = 1;
3206                         filetype = BTRFS_FT_DIR;
3207                 } else if (!list_empty(&rec->orphan_extents)) {
3208                         type_recovered = 1;
3209                         filetype = BTRFS_FT_REG_FILE;
3210                 } else{
3211                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3212                                rec->ino);
3213                         type_recovered = 1;
3214                         filetype = BTRFS_FT_REG_FILE;
3215                 }
3216         }
3217
3218         ret = btrfs_new_inode(trans, root, rec->ino,
3219                               mode | btrfs_type_to_imode(filetype));
3220         if (ret < 0)
3221                 goto out;
3222
3223         /*
3224          * Here inode rebuild is done, we only rebuild the inode item,
3225          * don't repair the nlink(like move to lost+found).
3226          * That is the job of nlink repair.
3227          *
3228          * We just fill the record and return
3229          */
3230         rec->found_dir_item = 1;
3231         rec->imode = mode | btrfs_type_to_imode(filetype);
3232         rec->nlink = 0;
3233         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234         /* Ensure the inode_nlinks repair function will be called */
3235         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3236 out:
3237         return ret;
3238 }
3239
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241                                       struct btrfs_root *root,
3242                                       struct btrfs_path *path,
3243                                       struct inode_record *rec)
3244 {
3245         struct orphan_data_extent *orphan;
3246         struct orphan_data_extent *tmp;
3247         int ret = 0;
3248
3249         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3250                 /*
3251                  * Check for conflicting file extents
3252                  *
3253                  * Here we don't know whether the extents is compressed or not,
3254                  * so we can only assume it not compressed nor data offset,
3255                  * and use its disk_len as extent length.
3256                  */
3257                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258                                        orphan->offset, orphan->disk_len, 0);
3259                 btrfs_release_path(path);
3260                 if (ret < 0)
3261                         goto out;
3262                 if (!ret) {
3263                         fprintf(stderr,
3264                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265                                 orphan->disk_bytenr, orphan->disk_len);
3266                         ret = btrfs_free_extent(trans,
3267                                         root->fs_info->extent_root,
3268                                         orphan->disk_bytenr, orphan->disk_len,
3269                                         0, root->objectid, orphan->objectid,
3270                                         orphan->offset);
3271                         if (ret < 0)
3272                                 goto out;
3273                 }
3274                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275                                 orphan->offset, orphan->disk_bytenr,
3276                                 orphan->disk_len, orphan->disk_len);
3277                 if (ret < 0)
3278                         goto out;
3279
3280                 /* Update file size info */
3281                 rec->found_size += orphan->disk_len;
3282                 if (rec->found_size == rec->nbytes)
3283                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3284
3285                 /* Update the file extent hole info too */
3286                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3287                                            orphan->disk_len);
3288                 if (ret < 0)
3289                         goto out;
3290                 if (RB_EMPTY_ROOT(&rec->holes))
3291                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3292
3293                 list_del(&orphan->list);
3294                 free(orphan);
3295         }
3296         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3297 out:
3298         return ret;
3299 }
3300
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302                                         struct btrfs_root *root,
3303                                         struct btrfs_path *path,
3304                                         struct inode_record *rec)
3305 {
3306         struct rb_node *node;
3307         struct file_extent_hole *hole;
3308         int found = 0;
3309         int ret = 0;
3310
3311         node = rb_first(&rec->holes);
3312
3313         while (node) {
3314                 found = 1;
3315                 hole = rb_entry(node, struct file_extent_hole, node);
3316                 ret = btrfs_punch_hole(trans, root, rec->ino,
3317                                        hole->start, hole->len);
3318                 if (ret < 0)
3319                         goto out;
3320                 ret = del_file_extent_hole(&rec->holes, hole->start,
3321                                            hole->len);
3322                 if (ret < 0)
3323                         goto out;
3324                 if (RB_EMPTY_ROOT(&rec->holes))
3325                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326                 node = rb_first(&rec->holes);
3327         }
3328         /* special case for a file losing all its file extent */
3329         if (!found) {
3330                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331                                        round_up(rec->isize,
3332                                                 root->fs_info->sectorsize));
3333                 if (ret < 0)
3334                         goto out;
3335         }
3336         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337                rec->ino, root->objectid);
3338 out:
3339         return ret;
3340 }
3341
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3343 {
3344         struct btrfs_trans_handle *trans;
3345         struct btrfs_path path;
3346         int ret = 0;
3347
3348         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349                              I_ERR_NO_ORPHAN_ITEM |
3350                              I_ERR_LINK_COUNT_WRONG |
3351                              I_ERR_NO_INODE_ITEM |
3352                              I_ERR_FILE_EXTENT_ORPHAN |
3353                              I_ERR_FILE_EXTENT_DISCOUNT|
3354                              I_ERR_FILE_NBYTES_WRONG)))
3355                 return rec->errors;
3356
3357         /*
3358          * For nlink repair, it may create a dir and add link, so
3359          * 2 for parent(256)'s dir_index and dir_item
3360          * 2 for lost+found dir's inode_item and inode_ref
3361          * 1 for the new inode_ref of the file
3362          * 2 for lost+found dir's dir_index and dir_item for the file
3363          */
3364         trans = btrfs_start_transaction(root, 7);
3365         if (IS_ERR(trans))
3366                 return PTR_ERR(trans);
3367
3368         btrfs_init_path(&path);
3369         if (rec->errors & I_ERR_NO_INODE_ITEM)
3370                 ret = repair_inode_no_item(trans, root, &path, rec);
3371         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376                 ret = repair_inode_isize(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380                 ret = repair_inode_nlinks(trans, root, &path, rec);
3381         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382                 ret = repair_inode_nbytes(trans, root, &path, rec);
3383         btrfs_commit_transaction(trans, root);
3384         btrfs_release_path(&path);
3385         return ret;
3386 }
3387
3388 static int check_inode_recs(struct btrfs_root *root,
3389                             struct cache_tree *inode_cache)
3390 {
3391         struct cache_extent *cache;
3392         struct ptr_node *node;
3393         struct inode_record *rec;
3394         struct inode_backref *backref;
3395         int stage = 0;
3396         int ret = 0;
3397         int err = 0;
3398         u64 error = 0;
3399         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3400
3401         if (btrfs_root_refs(&root->root_item) == 0) {
3402                 if (!cache_tree_empty(inode_cache))
3403                         fprintf(stderr, "warning line %d\n", __LINE__);
3404                 return 0;
3405         }
3406
3407         /*
3408          * We need to repair backrefs first because we could change some of the
3409          * errors in the inode recs.
3410          *
3411          * We also need to go through and delete invalid backrefs first and then
3412          * add the correct ones second.  We do this because we may get EEXIST
3413          * when adding back the correct index because we hadn't yet deleted the
3414          * invalid index.
3415          *
3416          * For example, if we were missing a dir index then the directories
3417          * isize would be wrong, so if we fixed the isize to what we thought it
3418          * would be and then fixed the backref we'd still have a invalid fs, so
3419          * we need to add back the dir index and then check to see if the isize
3420          * is still wrong.
3421          */
3422         while (stage < 3) {
3423                 stage++;
3424                 if (stage == 3 && !err)
3425                         break;
3426
3427                 cache = search_cache_extent(inode_cache, 0);
3428                 while (repair && cache) {
3429                         node = container_of(cache, struct ptr_node, cache);
3430                         rec = node->data;
3431                         cache = next_cache_extent(cache);
3432
3433                         /* Need to free everything up and rescan */
3434                         if (stage == 3) {
3435                                 remove_cache_extent(inode_cache, &node->cache);
3436                                 free(node);
3437                                 free_inode_rec(rec);
3438                                 continue;
3439                         }
3440
3441                         if (list_empty(&rec->backrefs))
3442                                 continue;
3443
3444                         ret = repair_inode_backrefs(root, rec, inode_cache,
3445                                                     stage == 1);
3446                         if (ret < 0) {
3447                                 err = ret;
3448                                 stage = 2;
3449                                 break;
3450                         } if (ret > 0) {
3451                                 err = -EAGAIN;
3452                         }
3453                 }
3454         }
3455         if (err)
3456                 return err;
3457
3458         rec = get_inode_rec(inode_cache, root_dirid, 0);
3459         BUG_ON(IS_ERR(rec));
3460         if (rec) {
3461                 ret = check_root_dir(rec);
3462                 if (ret) {
3463                         fprintf(stderr, "root %llu root dir %llu error\n",
3464                                 (unsigned long long)root->root_key.objectid,
3465                                 (unsigned long long)root_dirid);
3466                         print_inode_error(root, rec);
3467                         error++;
3468                 }
3469         } else {
3470                 if (repair) {
3471                         struct btrfs_trans_handle *trans;
3472
3473                         trans = btrfs_start_transaction(root, 1);
3474                         if (IS_ERR(trans)) {
3475                                 err = PTR_ERR(trans);
3476                                 return err;
3477                         }
3478
3479                         fprintf(stderr,
3480                                 "root %llu missing its root dir, recreating\n",
3481                                 (unsigned long long)root->objectid);
3482
3483                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3484                         BUG_ON(ret);
3485
3486                         btrfs_commit_transaction(trans, root);
3487                         return -EAGAIN;
3488                 }
3489
3490                 fprintf(stderr, "root %llu root dir %llu not found\n",
3491                         (unsigned long long)root->root_key.objectid,
3492                         (unsigned long long)root_dirid);
3493         }
3494
3495         while (1) {
3496                 cache = search_cache_extent(inode_cache, 0);
3497                 if (!cache)
3498                         break;
3499                 node = container_of(cache, struct ptr_node, cache);
3500                 rec = node->data;
3501                 remove_cache_extent(inode_cache, &node->cache);
3502                 free(node);
3503                 if (rec->ino == root_dirid ||
3504                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505                         free_inode_rec(rec);
3506                         continue;
3507                 }
3508
3509                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510                         ret = check_orphan_item(root, rec->ino);
3511                         if (ret == 0)
3512                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513                         if (can_free_inode_rec(rec)) {
3514                                 free_inode_rec(rec);
3515                                 continue;
3516                         }
3517                 }
3518
3519                 if (!rec->found_inode_item)
3520                         rec->errors |= I_ERR_NO_INODE_ITEM;
3521                 if (rec->found_link != rec->nlink)
3522                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3523                 if (repair) {
3524                         ret = try_repair_inode(root, rec);
3525                         if (ret == 0 && can_free_inode_rec(rec)) {
3526                                 free_inode_rec(rec);
3527                                 continue;
3528                         }
3529                         ret = 0;
3530                 }
3531
3532                 if (!(repair && ret == 0))
3533                         error++;
3534                 print_inode_error(root, rec);
3535                 list_for_each_entry(backref, &rec->backrefs, list) {
3536                         if (!backref->found_dir_item)
3537                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538                         if (!backref->found_dir_index)
3539                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540                         if (!backref->found_inode_ref)
3541                                 backref->errors |= REF_ERR_NO_INODE_REF;
3542                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543                                 " namelen %u name %s filetype %d errors %x",
3544                                 (unsigned long long)backref->dir,
3545                                 (unsigned long long)backref->index,
3546                                 backref->namelen, backref->name,
3547                                 backref->filetype, backref->errors);
3548                         print_ref_error(backref->errors);
3549                 }
3550                 free_inode_rec(rec);
3551         }
3552         return (error > 0) ? -1 : 0;
3553 }
3554
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3556                                         u64 objectid)
3557 {
3558         struct cache_extent *cache;
3559         struct root_record *rec = NULL;
3560         int ret;
3561
3562         cache = lookup_cache_extent(root_cache, objectid, 1);
3563         if (cache) {
3564                 rec = container_of(cache, struct root_record, cache);
3565         } else {
3566                 rec = calloc(1, sizeof(*rec));
3567                 if (!rec)
3568                         return ERR_PTR(-ENOMEM);
3569                 rec->objectid = objectid;
3570                 INIT_LIST_HEAD(&rec->backrefs);
3571                 rec->cache.start = objectid;
3572                 rec->cache.size = 1;
3573
3574                 ret = insert_cache_extent(root_cache, &rec->cache);
3575                 if (ret)
3576                         return ERR_PTR(-EEXIST);
3577         }
3578         return rec;
3579 }
3580
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582                                              u64 ref_root, u64 dir, u64 index,
3583                                              const char *name, int namelen)
3584 {
3585         struct root_backref *backref;
3586
3587         list_for_each_entry(backref, &rec->backrefs, list) {
3588                 if (backref->ref_root != ref_root || backref->dir != dir ||
3589                     backref->namelen != namelen)
3590                         continue;
3591                 if (memcmp(name, backref->name, namelen))
3592                         continue;
3593                 return backref;
3594         }
3595
3596         backref = calloc(1, sizeof(*backref) + namelen + 1);
3597         if (!backref)
3598                 return NULL;
3599         backref->ref_root = ref_root;
3600         backref->dir = dir;
3601         backref->index = index;
3602         backref->namelen = namelen;
3603         memcpy(backref->name, name, namelen);
3604         backref->name[namelen] = '\0';
3605         list_add_tail(&backref->list, &rec->backrefs);
3606         return backref;
3607 }
3608
3609 static void free_root_record(struct cache_extent *cache)
3610 {
3611         struct root_record *rec;
3612         struct root_backref *backref;
3613
3614         rec = container_of(cache, struct root_record, cache);
3615         while (!list_empty(&rec->backrefs)) {
3616                 backref = to_root_backref(rec->backrefs.next);
3617                 list_del(&backref->list);
3618                 free(backref);
3619         }
3620
3621         free(rec);
3622 }
3623
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3625
3626 static int add_root_backref(struct cache_tree *root_cache,
3627                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3628                             const char *name, int namelen,
3629                             int item_type, int errors)
3630 {
3631         struct root_record *rec;
3632         struct root_backref *backref;
3633
3634         rec = get_root_rec(root_cache, root_id);
3635         BUG_ON(IS_ERR(rec));
3636         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3637         BUG_ON(!backref);
3638
3639         backref->errors |= errors;
3640
3641         if (item_type != BTRFS_DIR_ITEM_KEY) {
3642                 if (backref->found_dir_index || backref->found_back_ref ||
3643                     backref->found_forward_ref) {
3644                         if (backref->index != index)
3645                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3646                 } else {
3647                         backref->index = index;
3648                 }
3649         }
3650
3651         if (item_type == BTRFS_DIR_ITEM_KEY) {
3652                 if (backref->found_forward_ref)
3653                         rec->found_ref++;
3654                 backref->found_dir_item = 1;
3655         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656                 backref->found_dir_index = 1;
3657         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658                 if (backref->found_forward_ref)
3659                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3660                 else if (backref->found_dir_item)
3661                         rec->found_ref++;
3662                 backref->found_forward_ref = 1;
3663         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664                 if (backref->found_back_ref)
3665                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666                 backref->found_back_ref = 1;
3667         } else {
3668                 BUG_ON(1);
3669         }
3670
3671         if (backref->found_forward_ref && backref->found_dir_item)
3672                 backref->reachable = 1;
3673         return 0;
3674 }
3675
3676 static int merge_root_recs(struct btrfs_root *root,
3677                            struct cache_tree *src_cache,
3678                            struct cache_tree *dst_cache)
3679 {
3680         struct cache_extent *cache;
3681         struct ptr_node *node;
3682         struct inode_record *rec;
3683         struct inode_backref *backref;
3684         int ret = 0;
3685
3686         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687                 free_inode_recs_tree(src_cache);
3688                 return 0;
3689         }
3690
3691         while (1) {
3692                 cache = search_cache_extent(src_cache, 0);
3693                 if (!cache)
3694                         break;
3695                 node = container_of(cache, struct ptr_node, cache);
3696                 rec = node->data;
3697                 remove_cache_extent(src_cache, &node->cache);
3698                 free(node);
3699
3700                 ret = is_child_root(root, root->objectid, rec->ino);
3701                 if (ret < 0)
3702                         break;
3703                 else if (ret == 0)
3704                         goto skip;
3705
3706                 list_for_each_entry(backref, &rec->backrefs, list) {
3707                         BUG_ON(backref->found_inode_ref);
3708                         if (backref->found_dir_item)
3709                                 add_root_backref(dst_cache, rec->ino,
3710                                         root->root_key.objectid, backref->dir,
3711                                         backref->index, backref->name,
3712                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3713                                         backref->errors);
3714                         if (backref->found_dir_index)
3715                                 add_root_backref(dst_cache, rec->ino,
3716                                         root->root_key.objectid, backref->dir,
3717                                         backref->index, backref->name,
3718                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3719                                         backref->errors);
3720                 }
3721 skip:
3722                 free_inode_rec(rec);
3723         }
3724         if (ret < 0)
3725                 return ret;
3726         return 0;
3727 }
3728
3729 static int check_root_refs(struct btrfs_root *root,
3730                            struct cache_tree *root_cache)
3731 {
3732         struct root_record *rec;
3733         struct root_record *ref_root;
3734         struct root_backref *backref;
3735         struct cache_extent *cache;
3736         int loop = 1;
3737         int ret;
3738         int error;
3739         int errors = 0;
3740
3741         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742         BUG_ON(IS_ERR(rec));
3743         rec->found_ref = 1;
3744
3745         /* fixme: this can not detect circular references */
3746         while (loop) {
3747                 loop = 0;
3748                 cache = search_cache_extent(root_cache, 0);
3749                 while (1) {
3750                         if (!cache)
3751                                 break;
3752                         rec = container_of(cache, struct root_record, cache);
3753                         cache = next_cache_extent(cache);
3754
3755                         if (rec->found_ref == 0)
3756                                 continue;
3757
3758                         list_for_each_entry(backref, &rec->backrefs, list) {
3759                                 if (!backref->reachable)
3760                                         continue;
3761
3762                                 ref_root = get_root_rec(root_cache,
3763                                                         backref->ref_root);
3764                                 BUG_ON(IS_ERR(ref_root));
3765                                 if (ref_root->found_ref > 0)
3766                                         continue;
3767
3768                                 backref->reachable = 0;
3769                                 rec->found_ref--;
3770                                 if (rec->found_ref == 0)
3771                                         loop = 1;
3772                         }
3773                 }
3774         }
3775
3776         cache = search_cache_extent(root_cache, 0);
3777         while (1) {
3778                 if (!cache)
3779                         break;
3780                 rec = container_of(cache, struct root_record, cache);
3781                 cache = next_cache_extent(cache);
3782
3783                 if (rec->found_ref == 0 &&
3784                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786                         ret = check_orphan_item(root->fs_info->tree_root,
3787                                                 rec->objectid);
3788                         if (ret == 0)
3789                                 continue;
3790
3791                         /*
3792                          * If we don't have a root item then we likely just have
3793                          * a dir item in a snapshot for this root but no actual
3794                          * ref key or anything so it's meaningless.
3795                          */
3796                         if (!rec->found_root_item)
3797                                 continue;
3798                         errors++;
3799                         fprintf(stderr, "fs tree %llu not referenced\n",
3800                                 (unsigned long long)rec->objectid);
3801                 }
3802
3803                 error = 0;
3804                 if (rec->found_ref > 0 && !rec->found_root_item)
3805                         error = 1;
3806                 list_for_each_entry(backref, &rec->backrefs, list) {
3807                         if (!backref->found_dir_item)
3808                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809                         if (!backref->found_dir_index)
3810                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811                         if (!backref->found_back_ref)
3812                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813                         if (!backref->found_forward_ref)
3814                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3815                         if (backref->reachable && backref->errors)
3816                                 error = 1;
3817                 }
3818                 if (!error)
3819                         continue;
3820
3821                 errors++;
3822                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823                         (unsigned long long)rec->objectid, rec->found_ref,
3824                          rec->found_root_item ? "" : "not found");
3825
3826                 list_for_each_entry(backref, &rec->backrefs, list) {
3827                         if (!backref->reachable)
3828                                 continue;
3829                         if (!backref->errors && rec->found_root_item)
3830                                 continue;
3831                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832                                 " index %llu namelen %u name %s errors %x\n",
3833                                 (unsigned long long)backref->ref_root,
3834                                 (unsigned long long)backref->dir,
3835                                 (unsigned long long)backref->index,
3836                                 backref->namelen, backref->name,
3837                                 backref->errors);
3838                         print_ref_error(backref->errors);
3839                 }
3840         }
3841         return errors > 0 ? 1 : 0;
3842 }
3843
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845                             struct btrfs_key *key,
3846                             struct cache_tree *root_cache)
3847 {
3848         u64 dirid;
3849         u64 index;
3850         u32 len;
3851         u32 name_len;
3852         struct btrfs_root_ref *ref;
3853         char namebuf[BTRFS_NAME_LEN];
3854         int error;
3855
3856         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3857
3858         dirid = btrfs_root_ref_dirid(eb, ref);
3859         index = btrfs_root_ref_sequence(eb, ref);
3860         name_len = btrfs_root_ref_name_len(eb, ref);
3861
3862         if (name_len <= BTRFS_NAME_LEN) {
3863                 len = name_len;
3864                 error = 0;
3865         } else {
3866                 len = BTRFS_NAME_LEN;
3867                 error = REF_ERR_NAME_TOO_LONG;
3868         }
3869         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3870
3871         if (key->type == BTRFS_ROOT_REF_KEY) {
3872                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873                                  index, namebuf, len, key->type, error);
3874         } else {
3875                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876                                  index, namebuf, len, key->type, error);
3877         }
3878         return 0;
3879 }
3880
3881 static void free_corrupt_block(struct cache_extent *cache)
3882 {
3883         struct btrfs_corrupt_block *corrupt;
3884
3885         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3886         free(corrupt);
3887 }
3888
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3890
3891 /*
3892  * Repair the btree of the given root.
3893  *
3894  * The fix is to remove the node key in corrupt_blocks cache_tree.
3895  * and rebalance the tree.
3896  * After the fix, the btree should be writeable.
3897  */
3898 static int repair_btree(struct btrfs_root *root,
3899                         struct cache_tree *corrupt_blocks)
3900 {
3901         struct btrfs_trans_handle *trans;
3902         struct btrfs_path path;
3903         struct btrfs_corrupt_block *corrupt;
3904         struct cache_extent *cache;
3905         struct btrfs_key key;
3906         u64 offset;
3907         int level;
3908         int ret = 0;
3909
3910         if (cache_tree_empty(corrupt_blocks))
3911                 return 0;
3912
3913         trans = btrfs_start_transaction(root, 1);
3914         if (IS_ERR(trans)) {
3915                 ret = PTR_ERR(trans);
3916                 fprintf(stderr, "Error starting transaction: %s\n",
3917                         strerror(-ret));
3918                 return ret;
3919         }
3920         btrfs_init_path(&path);
3921         cache = first_cache_extent(corrupt_blocks);
3922         while (cache) {
3923                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3924                                        cache);
3925                 level = corrupt->level;
3926                 path.lowest_level = level;
3927                 key.objectid = corrupt->key.objectid;
3928                 key.type = corrupt->key.type;
3929                 key.offset = corrupt->key.offset;
3930
3931                 /*
3932                  * Here we don't want to do any tree balance, since it may
3933                  * cause a balance with corrupted brother leaf/node,
3934                  * so ins_len set to 0 here.
3935                  * Balance will be done after all corrupt node/leaf is deleted.
3936                  */
3937                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3938                 if (ret < 0)
3939                         goto out;
3940                 offset = btrfs_node_blockptr(path.nodes[level],
3941                                              path.slots[level]);
3942
3943                 /* Remove the ptr */
3944                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3945                 if (ret < 0)
3946                         goto out;
3947                 /*
3948                  * Remove the corresponding extent
3949                  * return value is not concerned.
3950                  */
3951                 btrfs_release_path(&path);
3952                 ret = btrfs_free_extent(trans, root, offset,
3953                                 root->fs_info->nodesize, 0,
3954                                 root->root_key.objectid, level - 1, 0);
3955                 cache = next_cache_extent(cache);
3956         }
3957
3958         /* Balance the btree using btrfs_search_slot() */
3959         cache = first_cache_extent(corrupt_blocks);
3960         while (cache) {
3961                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3962                                        cache);
3963                 memcpy(&key, &corrupt->key, sizeof(key));
3964                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3965                 if (ret < 0)
3966                         goto out;
3967                 /* return will always >0 since it won't find the item */
3968                 ret = 0;
3969                 btrfs_release_path(&path);
3970                 cache = next_cache_extent(cache);
3971         }
3972 out:
3973         btrfs_commit_transaction(trans, root);
3974         btrfs_release_path(&path);
3975         return ret;
3976 }
3977
3978 static int check_fs_root(struct btrfs_root *root,
3979                          struct cache_tree *root_cache,
3980                          struct walk_control *wc)
3981 {
3982         int ret = 0;
3983         int err = 0;
3984         int wret;
3985         int level;
3986         struct btrfs_path path;
3987         struct shared_node root_node;
3988         struct root_record *rec;
3989         struct btrfs_root_item *root_item = &root->root_item;
3990         struct cache_tree corrupt_blocks;
3991         struct orphan_data_extent *orphan;
3992         struct orphan_data_extent *tmp;
3993         enum btrfs_tree_block_status status;
3994         struct node_refs nrefs;
3995
3996         /*
3997          * Reuse the corrupt_block cache tree to record corrupted tree block
3998          *
3999          * Unlike the usage in extent tree check, here we do it in a per
4000          * fs/subvol tree base.
4001          */
4002         cache_tree_init(&corrupt_blocks);
4003         root->fs_info->corrupt_blocks = &corrupt_blocks;
4004
4005         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006                 rec = get_root_rec(root_cache, root->root_key.objectid);
4007                 BUG_ON(IS_ERR(rec));
4008                 if (btrfs_root_refs(root_item) > 0)
4009                         rec->found_root_item = 1;
4010         }
4011
4012         btrfs_init_path(&path);
4013         memset(&root_node, 0, sizeof(root_node));
4014         cache_tree_init(&root_node.root_cache);
4015         cache_tree_init(&root_node.inode_cache);
4016         memset(&nrefs, 0, sizeof(nrefs));
4017
4018         /* Move the orphan extent record to corresponding inode_record */
4019         list_for_each_entry_safe(orphan, tmp,
4020                                  &root->orphan_data_extents, list) {
4021                 struct inode_record *inode;
4022
4023                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4024                                       1);
4025                 BUG_ON(IS_ERR(inode));
4026                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027                 list_move(&orphan->list, &inode->orphan_extents);
4028         }
4029
4030         level = btrfs_header_level(root->node);
4031         memset(wc->nodes, 0, sizeof(wc->nodes));
4032         wc->nodes[level] = &root_node;
4033         wc->active_node = level;
4034         wc->root_level = level;
4035
4036         /* We may not have checked the root block, lets do that now */
4037         if (btrfs_is_leaf(root->node))
4038                 status = btrfs_check_leaf(root, NULL, root->node);
4039         else
4040                 status = btrfs_check_node(root, NULL, root->node);
4041         if (status != BTRFS_TREE_BLOCK_CLEAN)
4042                 return -EIO;
4043
4044         if (btrfs_root_refs(root_item) > 0 ||
4045             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046                 path.nodes[level] = root->node;
4047                 extent_buffer_get(root->node);
4048                 path.slots[level] = 0;
4049         } else {
4050                 struct btrfs_key key;
4051                 struct btrfs_disk_key found_key;
4052
4053                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054                 level = root_item->drop_level;
4055                 path.lowest_level = level;
4056                 if (level > btrfs_header_level(root->node) ||
4057                     level >= BTRFS_MAX_LEVEL) {
4058                         error("ignoring invalid drop level: %u", level);
4059                         goto skip_walking;
4060                 }
4061                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4062                 if (wret < 0)
4063                         goto skip_walking;
4064                 btrfs_node_key(path.nodes[level], &found_key,
4065                                 path.slots[level]);
4066                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067                                         sizeof(found_key)));
4068         }
4069
4070         while (1) {
4071                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4072                 if (wret < 0)
4073                         ret = wret;
4074                 if (wret != 0)
4075                         break;
4076
4077                 wret = walk_up_tree(root, &path, wc, &level);
4078                 if (wret < 0)
4079                         ret = wret;
4080                 if (wret != 0)
4081                         break;
4082         }
4083 skip_walking:
4084         btrfs_release_path(&path);
4085
4086         if (!cache_tree_empty(&corrupt_blocks)) {
4087                 struct cache_extent *cache;
4088                 struct btrfs_corrupt_block *corrupt;
4089
4090                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091                        root->root_key.objectid);
4092                 cache = first_cache_extent(&corrupt_blocks);
4093                 while (cache) {
4094                         corrupt = container_of(cache,
4095                                                struct btrfs_corrupt_block,
4096                                                cache);
4097                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098                                cache->start, corrupt->level,
4099                                corrupt->key.objectid, corrupt->key.type,
4100                                corrupt->key.offset);
4101                         cache = next_cache_extent(cache);
4102                 }
4103                 if (repair) {
4104                         printf("Try to repair the btree for root %llu\n",
4105                                root->root_key.objectid);
4106                         ret = repair_btree(root, &corrupt_blocks);
4107                         if (ret < 0)
4108                                 fprintf(stderr, "Failed to repair btree: %s\n",
4109                                         strerror(-ret));
4110                         if (!ret)
4111                                 printf("Btree for root %llu is fixed\n",
4112                                        root->root_key.objectid);
4113                 }
4114         }
4115
4116         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4117         if (err < 0)
4118                 ret = err;
4119
4120         if (root_node.current) {
4121                 root_node.current->checked = 1;
4122                 maybe_free_inode_rec(&root_node.inode_cache,
4123                                 root_node.current);
4124         }
4125
4126         err = check_inode_recs(root, &root_node.inode_cache);
4127         if (!ret)
4128                 ret = err;
4129
4130         free_corrupt_blocks_tree(&corrupt_blocks);
4131         root->fs_info->corrupt_blocks = NULL;
4132         free_orphan_data_extents(&root->orphan_data_extents);
4133         return ret;
4134 }
4135
4136 static int fs_root_objectid(u64 objectid)
4137 {
4138         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4140                 return 1;
4141         return is_fstree(objectid);
4142 }
4143
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145                           struct cache_tree *root_cache)
4146 {
4147         struct btrfs_path path;
4148         struct btrfs_key key;
4149         struct walk_control wc;
4150         struct extent_buffer *leaf, *tree_node;
4151         struct btrfs_root *tmp_root;
4152         struct btrfs_root *tree_root = fs_info->tree_root;
4153         int ret;
4154         int err = 0;
4155
4156         if (ctx.progress_enabled) {
4157                 ctx.tp = TASK_FS_ROOTS;
4158                 task_start(ctx.info);
4159         }
4160
4161         /*
4162          * Just in case we made any changes to the extent tree that weren't
4163          * reflected into the free space cache yet.
4164          */
4165         if (repair)
4166                 reset_cached_block_groups(fs_info);
4167         memset(&wc, 0, sizeof(wc));
4168         cache_tree_init(&wc.shared);
4169         btrfs_init_path(&path);
4170
4171 again:
4172         key.offset = 0;
4173         key.objectid = 0;
4174         key.type = BTRFS_ROOT_ITEM_KEY;
4175         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4176         if (ret < 0) {
4177                 err = 1;
4178                 goto out;
4179         }
4180         tree_node = tree_root->node;
4181         while (1) {
4182                 if (tree_node != tree_root->node) {
4183                         free_root_recs_tree(root_cache);
4184                         btrfs_release_path(&path);
4185                         goto again;
4186                 }
4187                 leaf = path.nodes[0];
4188                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189                         ret = btrfs_next_leaf(tree_root, &path);
4190                         if (ret) {
4191                                 if (ret < 0)
4192                                         err = 1;
4193                                 break;
4194                         }
4195                         leaf = path.nodes[0];
4196                 }
4197                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199                     fs_root_objectid(key.objectid)) {
4200                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201                                 tmp_root = btrfs_read_fs_root_no_cache(
4202                                                 fs_info, &key);
4203                         } else {
4204                                 key.offset = (u64)-1;
4205                                 tmp_root = btrfs_read_fs_root(
4206                                                 fs_info, &key);
4207                         }
4208                         if (IS_ERR(tmp_root)) {
4209                                 err = 1;
4210                                 goto next;
4211                         }
4212                         ret = check_fs_root(tmp_root, root_cache, &wc);
4213                         if (ret == -EAGAIN) {
4214                                 free_root_recs_tree(root_cache);
4215                                 btrfs_release_path(&path);
4216                                 goto again;
4217                         }
4218                         if (ret)
4219                                 err = 1;
4220                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221                                 btrfs_free_fs_root(tmp_root);
4222                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4224                         process_root_ref(leaf, path.slots[0], &key,
4225                                          root_cache);
4226                 }
4227 next:
4228                 path.slots[0]++;
4229         }
4230 out:
4231         btrfs_release_path(&path);
4232         if (err)
4233                 free_extent_cache_tree(&wc.shared);
4234         if (!cache_tree_empty(&wc.shared))
4235                 fprintf(stderr, "warning line %d\n", __LINE__);
4236
4237         task_stop(ctx.info);
4238
4239         return err;
4240 }
4241
4242 /*
4243  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244  * INODE_REF/INODE_EXTREF match.
4245  *
4246  * @root:       the root of the fs/file tree
4247  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4248  * @key:        the key of the DIR_ITEM/DIR_INDEX
4249  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4250  *              distinguish root_dir between normal dir/file
4251  * @name:       the name in the INODE_REF/INODE_EXTREF
4252  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4253  * @mode:       the st_mode of INODE_ITEM
4254  *
4255  * Return 0 if no error occurred.
4256  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4258  * dir/file.
4259  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260  * not match for normal dir/file.
4261  */
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263                          struct btrfs_key *key, u64 index, char *name,
4264                          u32 namelen, u32 mode)
4265 {
4266         struct btrfs_path path;
4267         struct extent_buffer *node;
4268         struct btrfs_dir_item *di;
4269         struct btrfs_key location;
4270         char namebuf[BTRFS_NAME_LEN] = {0};
4271         u32 total;
4272         u32 cur = 0;
4273         u32 len;
4274         u32 name_len;
4275         u32 data_len;
4276         u8 filetype;
4277         int slot;
4278         int ret;
4279
4280         btrfs_init_path(&path);
4281         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4282         if (ret < 0) {
4283                 ret = DIR_ITEM_MISSING;
4284                 goto out;
4285         }
4286
4287         /* Process root dir and goto out*/
4288         if (index == 0) {
4289                 if (ret == 0) {
4290                         ret = ROOT_DIR_ERROR;
4291                         error(
4292                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4293                                 root->objectid,
4294                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4295                                         "REF" : "EXTREF",
4296                                 ref_key->objectid, ref_key->offset,
4297                                 key->type == BTRFS_DIR_ITEM_KEY ?
4298                                         "DIR_ITEM" : "DIR_INDEX");
4299                 } else {
4300                         ret = 0;
4301                 }
4302
4303                 goto out;
4304         }
4305
4306         /* Process normal file/dir */
4307         if (ret > 0) {
4308                 ret = DIR_ITEM_MISSING;
4309                 error(
4310                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4311                         root->objectid,
4312                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313                         ref_key->objectid, ref_key->offset,
4314                         key->type == BTRFS_DIR_ITEM_KEY ?
4315                                 "DIR_ITEM" : "DIR_INDEX",
4316                         key->objectid, key->offset, namelen, name,
4317                         imode_to_type(mode));
4318                 goto out;
4319         }
4320
4321         /* Check whether inode_id/filetype/name match */
4322         node = path.nodes[0];
4323         slot = path.slots[0];
4324         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325         total = btrfs_item_size_nr(node, slot);
4326         while (cur < total) {
4327                 ret = DIR_ITEM_MISMATCH;
4328                 name_len = btrfs_dir_name_len(node, di);
4329                 data_len = btrfs_dir_data_len(node, di);
4330
4331                 btrfs_dir_item_key_to_cpu(node, di, &location);
4332                 if (location.objectid != ref_key->objectid ||
4333                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4334                     location.offset != 0)
4335                         goto next;
4336
4337                 filetype = btrfs_dir_type(node, di);
4338                 if (imode_to_type(mode) != filetype)
4339                         goto next;
4340
4341                 if (cur + sizeof(*di) + name_len > total ||
4342                     name_len > BTRFS_NAME_LEN) {
4343                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4344                                 root->objectid,
4345                                 key->type == BTRFS_DIR_ITEM_KEY ?
4346                                 "DIR_ITEM" : "DIR_INDEX",
4347                                 key->objectid, key->offset, name_len);
4348
4349                         if (cur + sizeof(*di) > total)
4350                                 break;
4351                         len = min_t(u32, total - cur - sizeof(*di),
4352                                     BTRFS_NAME_LEN);
4353                 } else {
4354                         len = name_len;
4355                 }
4356
4357                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358                 if (len != namelen || strncmp(namebuf, name, len))
4359                         goto next;
4360
4361                 ret = 0;
4362                 goto out;
4363 next:
4364                 len = sizeof(*di) + name_len + data_len;
4365                 di = (struct btrfs_dir_item *)((char *)di + len);
4366                 cur += len;
4367         }
4368         if (ret == DIR_ITEM_MISMATCH)
4369                 error(
4370                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4371                         root->objectid,
4372                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373                         ref_key->objectid, ref_key->offset,
4374                         key->type == BTRFS_DIR_ITEM_KEY ?
4375                                 "DIR_ITEM" : "DIR_INDEX",
4376                         key->objectid, key->offset, namelen, name,
4377                         imode_to_type(mode));
4378 out:
4379         btrfs_release_path(&path);
4380         return ret;
4381 }
4382
4383 /*
4384  * Traverse the given INODE_REF and call find_dir_item() to find related
4385  * DIR_ITEM/DIR_INDEX.
4386  *
4387  * @root:       the root of the fs/file tree
4388  * @ref_key:    the key of the INODE_REF
4389  * @refs:       the count of INODE_REF
4390  * @mode:       the st_mode of INODE_ITEM
4391  *
4392  * Return 0 if no error occurred.
4393  */
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395                            struct extent_buffer *node, int slot, u64 *refs,
4396                            int mode)
4397 {
4398         struct btrfs_key key;
4399         struct btrfs_inode_ref *ref;
4400         char namebuf[BTRFS_NAME_LEN] = {0};
4401         u32 total;
4402         u32 cur = 0;
4403         u32 len;
4404         u32 name_len;
4405         u64 index;
4406         int ret, err = 0;
4407
4408         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409         total = btrfs_item_size_nr(node, slot);
4410
4411 next:
4412         /* Update inode ref count */
4413         (*refs)++;
4414
4415         index = btrfs_inode_ref_index(node, ref);
4416         name_len = btrfs_inode_ref_name_len(node, ref);
4417         if (cur + sizeof(*ref) + name_len > total ||
4418             name_len > BTRFS_NAME_LEN) {
4419                 warning("root %llu INODE_REF[%llu %llu] name too long",
4420                         root->objectid, ref_key->objectid, ref_key->offset);
4421
4422                 if (total < cur + sizeof(*ref))
4423                         goto out;
4424                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4425         } else {
4426                 len = name_len;
4427         }
4428
4429         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4430
4431         /* Check root dir ref name */
4432         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434                       root->objectid, ref_key->objectid, ref_key->offset,
4435                       namebuf);
4436                 err |= ROOT_DIR_ERROR;
4437         }
4438
4439         /* Find related DIR_INDEX */
4440         key.objectid = ref_key->offset;
4441         key.type = BTRFS_DIR_INDEX_KEY;
4442         key.offset = index;
4443         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4444         err |= ret;
4445
4446         /* Find related dir_item */
4447         key.objectid = ref_key->offset;
4448         key.type = BTRFS_DIR_ITEM_KEY;
4449         key.offset = btrfs_name_hash(namebuf, len);
4450         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4451         err |= ret;
4452
4453         len = sizeof(*ref) + name_len;
4454         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4455         cur += len;
4456         if (cur < total)
4457                 goto next;
4458
4459 out:
4460         return err;
4461 }
4462
4463 /*
4464  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465  * DIR_ITEM/DIR_INDEX.
4466  *
4467  * @root:       the root of the fs/file tree
4468  * @ref_key:    the key of the INODE_EXTREF
4469  * @refs:       the count of INODE_EXTREF
4470  * @mode:       the st_mode of INODE_ITEM
4471  *
4472  * Return 0 if no error occurred.
4473  */
4474 static int check_inode_extref(struct btrfs_root *root,
4475                               struct btrfs_key *ref_key,
4476                               struct extent_buffer *node, int slot, u64 *refs,
4477                               int mode)
4478 {
4479         struct btrfs_key key;
4480         struct btrfs_inode_extref *extref;
4481         char namebuf[BTRFS_NAME_LEN] = {0};
4482         u32 total;
4483         u32 cur = 0;
4484         u32 len;
4485         u32 name_len;
4486         u64 index;
4487         u64 parent;
4488         int ret;
4489         int err = 0;
4490
4491         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492         total = btrfs_item_size_nr(node, slot);
4493
4494 next:
4495         /* update inode ref count */
4496         (*refs)++;
4497         name_len = btrfs_inode_extref_name_len(node, extref);
4498         index = btrfs_inode_extref_index(node, extref);
4499         parent = btrfs_inode_extref_parent(node, extref);
4500         if (name_len <= BTRFS_NAME_LEN) {
4501                 len = name_len;
4502         } else {
4503                 len = BTRFS_NAME_LEN;
4504                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505                         root->objectid, ref_key->objectid, ref_key->offset);
4506         }
4507         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4508
4509         /* Check root dir ref name */
4510         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512                       root->objectid, ref_key->objectid, ref_key->offset,
4513                       namebuf);
4514                 err |= ROOT_DIR_ERROR;
4515         }
4516
4517         /* find related dir_index */
4518         key.objectid = parent;
4519         key.type = BTRFS_DIR_INDEX_KEY;
4520         key.offset = index;
4521         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4522         err |= ret;
4523
4524         /* find related dir_item */
4525         key.objectid = parent;
4526         key.type = BTRFS_DIR_ITEM_KEY;
4527         key.offset = btrfs_name_hash(namebuf, len);
4528         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4529         err |= ret;
4530
4531         len = sizeof(*extref) + name_len;
4532         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4533         cur += len;
4534
4535         if (cur < total)
4536                 goto next;
4537
4538         return err;
4539 }
4540
4541 /*
4542  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543  * DIR_ITEM/DIR_INDEX match.
4544  *
4545  * @root:       the root of the fs/file tree
4546  * @key:        the key of the INODE_REF/INODE_EXTREF
4547  * @name:       the name in the INODE_REF/INODE_EXTREF
4548  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4549  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4550  * to (u64)-1
4551  * @ext_ref:    the EXTENDED_IREF feature
4552  *
4553  * Return 0 if no error occurred.
4554  * Return >0 for error bitmap
4555  */
4556 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4557                           char *name, int namelen, u64 index,
4558                           unsigned int ext_ref)
4559 {
4560         struct btrfs_path path;
4561         struct btrfs_inode_ref *ref;
4562         struct btrfs_inode_extref *extref;
4563         struct extent_buffer *node;
4564         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4565         u32 total;
4566         u32 cur = 0;
4567         u32 len;
4568         u32 ref_namelen;
4569         u64 ref_index;
4570         u64 parent;
4571         u64 dir_id;
4572         int slot;
4573         int ret;
4574
4575         btrfs_init_path(&path);
4576         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4577         if (ret) {
4578                 ret = INODE_REF_MISSING;
4579                 goto extref;
4580         }
4581
4582         node = path.nodes[0];
4583         slot = path.slots[0];
4584
4585         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4586         total = btrfs_item_size_nr(node, slot);
4587
4588         /* Iterate all entry of INODE_REF */
4589         while (cur < total) {
4590                 ret = INODE_REF_MISSING;
4591
4592                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4593                 ref_index = btrfs_inode_ref_index(node, ref);
4594                 if (index != (u64)-1 && index != ref_index)
4595                         goto next_ref;
4596
4597                 if (cur + sizeof(*ref) + ref_namelen > total ||
4598                     ref_namelen > BTRFS_NAME_LEN) {
4599                         warning("root %llu INODE %s[%llu %llu] name too long",
4600                                 root->objectid,
4601                                 key->type == BTRFS_INODE_REF_KEY ?
4602                                         "REF" : "EXTREF",
4603                                 key->objectid, key->offset);
4604
4605                         if (cur + sizeof(*ref) > total)
4606                                 break;
4607                         len = min_t(u32, total - cur - sizeof(*ref),
4608                                     BTRFS_NAME_LEN);
4609                 } else {
4610                         len = ref_namelen;
4611                 }
4612
4613                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4614                                    len);
4615
4616                 if (len != namelen || strncmp(ref_namebuf, name, len))
4617                         goto next_ref;
4618
4619                 ret = 0;
4620                 goto out;
4621 next_ref:
4622                 len = sizeof(*ref) + ref_namelen;
4623                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4624                 cur += len;
4625         }
4626
4627 extref:
4628         /* Skip if not support EXTENDED_IREF feature */
4629         if (!ext_ref)
4630                 goto out;
4631
4632         btrfs_release_path(&path);
4633         btrfs_init_path(&path);
4634
4635         dir_id = key->offset;
4636         key->type = BTRFS_INODE_EXTREF_KEY;
4637         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4638
4639         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4640         if (ret) {
4641                 ret = INODE_REF_MISSING;
4642                 goto out;
4643         }
4644
4645         node = path.nodes[0];
4646         slot = path.slots[0];
4647
4648         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4649         cur = 0;
4650         total = btrfs_item_size_nr(node, slot);
4651
4652         /* Iterate all entry of INODE_EXTREF */
4653         while (cur < total) {
4654                 ret = INODE_REF_MISSING;
4655
4656                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4657                 ref_index = btrfs_inode_extref_index(node, extref);
4658                 parent = btrfs_inode_extref_parent(node, extref);
4659                 if (index != (u64)-1 && index != ref_index)
4660                         goto next_extref;
4661
4662                 if (parent != dir_id)
4663                         goto next_extref;
4664
4665                 if (ref_namelen <= BTRFS_NAME_LEN) {
4666                         len = ref_namelen;
4667                 } else {
4668                         len = BTRFS_NAME_LEN;
4669                         warning("root %llu INODE %s[%llu %llu] name too long",
4670                                 root->objectid,
4671                                 key->type == BTRFS_INODE_REF_KEY ?
4672                                         "REF" : "EXTREF",
4673                                 key->objectid, key->offset);
4674                 }
4675                 read_extent_buffer(node, ref_namebuf,
4676                                    (unsigned long)(extref + 1), len);
4677
4678                 if (len != namelen || strncmp(ref_namebuf, name, len))
4679                         goto next_extref;
4680
4681                 ret = 0;
4682                 goto out;
4683
4684 next_extref:
4685                 len = sizeof(*extref) + ref_namelen;
4686                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4687                 cur += len;
4688
4689         }
4690 out:
4691         btrfs_release_path(&path);
4692         return ret;
4693 }
4694
4695 /*
4696  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4697  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4698  *
4699  * @root:       the root of the fs/file tree
4700  * @key:        the key of the INODE_REF/INODE_EXTREF
4701  * @size:       the st_size of the INODE_ITEM
4702  * @ext_ref:    the EXTENDED_IREF feature
4703  *
4704  * Return 0 if no error occurred.
4705  */
4706 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4707                           struct extent_buffer *node, int slot, u64 *size,
4708                           unsigned int ext_ref)
4709 {
4710         struct btrfs_dir_item *di;
4711         struct btrfs_inode_item *ii;
4712         struct btrfs_path path;
4713         struct btrfs_key location;
4714         char namebuf[BTRFS_NAME_LEN] = {0};
4715         u32 total;
4716         u32 cur = 0;
4717         u32 len;
4718         u32 name_len;
4719         u32 data_len;
4720         u8 filetype;
4721         u32 mode;
4722         u64 index;
4723         int ret;
4724         int err = 0;
4725
4726         /*
4727          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4728          * ignore index check.
4729          */
4730         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4731
4732         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4733         total = btrfs_item_size_nr(node, slot);
4734
4735         while (cur < total) {
4736                 data_len = btrfs_dir_data_len(node, di);
4737                 if (data_len)
4738                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4739                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4740                               "DIR_ITEM" : "DIR_INDEX",
4741                               key->objectid, key->offset, data_len);
4742
4743                 name_len = btrfs_dir_name_len(node, di);
4744                 if (cur + sizeof(*di) + name_len > total ||
4745                     name_len > BTRFS_NAME_LEN) {
4746                         warning("root %llu %s[%llu %llu] name too long",
4747                                 root->objectid,
4748                                 key->type == BTRFS_DIR_ITEM_KEY ?
4749                                 "DIR_ITEM" : "DIR_INDEX",
4750                                 key->objectid, key->offset);
4751
4752                         if (cur + sizeof(*di) > total)
4753                                 break;
4754                         len = min_t(u32, total - cur - sizeof(*di),
4755                                     BTRFS_NAME_LEN);
4756                 } else {
4757                         len = name_len;
4758                 }
4759                 (*size) += name_len;
4760
4761                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4762                 filetype = btrfs_dir_type(node, di);
4763
4764                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4765                     key->offset != btrfs_name_hash(namebuf, len)) {
4766                         err |= -EIO;
4767                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4768                                 root->objectid, key->objectid, key->offset,
4769                                 namebuf, len, filetype, key->offset,
4770                                 btrfs_name_hash(namebuf, len));
4771                 }
4772
4773                 btrfs_init_path(&path);
4774                 btrfs_dir_item_key_to_cpu(node, di, &location);
4775
4776                 /* Ignore related ROOT_ITEM check */
4777                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4778                         goto next;
4779
4780                 /* Check relative INODE_ITEM(existence/filetype) */
4781                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4782                 if (ret) {
4783                         err |= INODE_ITEM_MISSING;
4784                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4785                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4786                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4787                               key->offset, location.objectid, name_len,
4788                               namebuf, filetype);
4789                         goto next;
4790                 }
4791
4792                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4793                                     struct btrfs_inode_item);
4794                 mode = btrfs_inode_mode(path.nodes[0], ii);
4795
4796                 if (imode_to_type(mode) != filetype) {
4797                         err |= INODE_ITEM_MISMATCH;
4798                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4799                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4800                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4801                               key->offset, name_len, namebuf, filetype);
4802                 }
4803
4804                 /* Check relative INODE_REF/INODE_EXTREF */
4805                 location.type = BTRFS_INODE_REF_KEY;
4806                 location.offset = key->objectid;
4807                 ret = find_inode_ref(root, &location, namebuf, len,
4808                                        index, ext_ref);
4809                 err |= ret;
4810                 if (ret & INODE_REF_MISSING)
4811                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4812                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4813                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4814                               key->offset, name_len, namebuf, filetype);
4815
4816 next:
4817                 btrfs_release_path(&path);
4818                 len = sizeof(*di) + name_len + data_len;
4819                 di = (struct btrfs_dir_item *)((char *)di + len);
4820                 cur += len;
4821
4822                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4823                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4824                               root->objectid, key->objectid, key->offset);
4825                         break;
4826                 }
4827         }
4828
4829         return err;
4830 }
4831
4832 /*
4833  * Check file extent datasum/hole, update the size of the file extents,
4834  * check and update the last offset of the file extent.
4835  *
4836  * @root:       the root of fs/file tree.
4837  * @fkey:       the key of the file extent.
4838  * @nodatasum:  INODE_NODATASUM feature.
4839  * @size:       the sum of all EXTENT_DATA items size for this inode.
4840  * @end:        the offset of the last extent.
4841  *
4842  * Return 0 if no error occurred.
4843  */
4844 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4845                              struct extent_buffer *node, int slot,
4846                              unsigned int nodatasum, u64 *size, u64 *end)
4847 {
4848         struct btrfs_file_extent_item *fi;
4849         u64 disk_bytenr;
4850         u64 disk_num_bytes;
4851         u64 extent_num_bytes;
4852         u64 extent_offset;
4853         u64 csum_found;         /* In byte size, sectorsize aligned */
4854         u64 search_start;       /* Logical range start we search for csum */
4855         u64 search_len;         /* Logical range len we search for csum */
4856         unsigned int extent_type;
4857         unsigned int is_hole;
4858         int compressed = 0;
4859         int ret;
4860         int err = 0;
4861
4862         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4863
4864         /* Check inline extent */
4865         extent_type = btrfs_file_extent_type(node, fi);
4866         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4867                 struct btrfs_item *e = btrfs_item_nr(slot);
4868                 u32 item_inline_len;
4869
4870                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4871                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4872                 compressed = btrfs_file_extent_compression(node, fi);
4873                 if (extent_num_bytes == 0) {
4874                         error(
4875                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4876                                 root->objectid, fkey->objectid, fkey->offset);
4877                         err |= FILE_EXTENT_ERROR;
4878                 }
4879                 if (!compressed && extent_num_bytes != item_inline_len) {
4880                         error(
4881                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4882                                 root->objectid, fkey->objectid, fkey->offset,
4883                                 extent_num_bytes, item_inline_len);
4884                         err |= FILE_EXTENT_ERROR;
4885                 }
4886                 *end += extent_num_bytes;
4887                 *size += extent_num_bytes;
4888                 return err;
4889         }
4890
4891         /* Check extent type */
4892         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4893                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4894                 err |= FILE_EXTENT_ERROR;
4895                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4896                       root->objectid, fkey->objectid, fkey->offset);
4897                 return err;
4898         }
4899
4900         /* Check REG_EXTENT/PREALLOC_EXTENT */
4901         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4902         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4903         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4904         extent_offset = btrfs_file_extent_offset(node, fi);
4905         compressed = btrfs_file_extent_compression(node, fi);
4906         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4907
4908         /*
4909          * Check EXTENT_DATA csum
4910          *
4911          * For plain (uncompressed) extent, we should only check the range
4912          * we're referring to, as it's possible that part of prealloc extent
4913          * has been written, and has csum:
4914          *
4915          * |<--- Original large preallocated extent A ---->|
4916          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4917          *      No csum                         Has csum
4918          *
4919          * For compressed extent, we should check the whole range.
4920          */
4921         if (!compressed) {
4922                 search_start = disk_bytenr + extent_offset;
4923                 search_len = extent_num_bytes;
4924         } else {
4925                 search_start = disk_bytenr;
4926                 search_len = disk_num_bytes;
4927         }
4928         ret = count_csum_range(root, search_start, search_len, &csum_found);
4929         if (csum_found > 0 && nodatasum) {
4930                 err |= ODD_CSUM_ITEM;
4931                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4932                       root->objectid, fkey->objectid, fkey->offset);
4933         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4934                    !is_hole && (ret < 0 || csum_found < search_len)) {
4935                 err |= CSUM_ITEM_MISSING;
4936                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4937                       root->objectid, fkey->objectid, fkey->offset,
4938                       csum_found, search_len);
4939         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4940                 err |= ODD_CSUM_ITEM;
4941                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4942                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4943         }
4944
4945         /* Check EXTENT_DATA hole */
4946         if (!no_holes && *end != fkey->offset) {
4947                 err |= FILE_EXTENT_ERROR;
4948                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4949                       root->objectid, fkey->objectid, fkey->offset);
4950         }
4951
4952         *end += extent_num_bytes;
4953         if (!is_hole)
4954                 *size += extent_num_bytes;
4955
4956         return err;
4957 }
4958
4959 /*
4960  * Set inode item nbytes to @nbytes
4961  *
4962  * Returns  0     on success
4963  * Returns  != 0  on error
4964  */
4965 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4966                                       struct btrfs_path *path,
4967                                       u64 ino, u64 nbytes)
4968 {
4969         struct btrfs_trans_handle *trans;
4970         struct btrfs_inode_item *ii;
4971         struct btrfs_key key;
4972         struct btrfs_key research_key;
4973         int err = 0;
4974         int ret;
4975
4976         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4977
4978         key.objectid = ino;
4979         key.type = BTRFS_INODE_ITEM_KEY;
4980         key.offset = 0;
4981
4982         trans = btrfs_start_transaction(root, 1);
4983         if (IS_ERR(trans)) {
4984                 ret = PTR_ERR(trans);
4985                 err |= ret;
4986                 goto out;
4987         }
4988
4989         btrfs_release_path(path);
4990         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4991         if (ret > 0)
4992                 ret = -ENOENT;
4993         if (ret) {
4994                 err |= ret;
4995                 goto fail;
4996         }
4997
4998         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
4999                             struct btrfs_inode_item);
5000         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5001         btrfs_mark_buffer_dirty(path->nodes[0]);
5002 fail:
5003         btrfs_commit_transaction(trans, root);
5004 out:
5005         if (ret)
5006                 error("failed to set nbytes in inode %llu root %llu",
5007                       ino, root->root_key.objectid);
5008         else
5009                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5010                        root->root_key.objectid, nbytes);
5011
5012         /* research path */
5013         btrfs_release_path(path);
5014         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5015         err |= ret;
5016
5017         return err;
5018 }
5019
5020 /*
5021  * Check INODE_ITEM and related ITEMs (the same inode number)
5022  * 1. check link count
5023  * 2. check inode ref/extref
5024  * 3. check dir item/index
5025  *
5026  * @ext_ref:    the EXTENDED_IREF feature
5027  *
5028  * Return 0 if no error occurred.
5029  * Return >0 for error or hit the traversal is done(by error bitmap)
5030  */
5031 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5032                             unsigned int ext_ref)
5033 {
5034         struct extent_buffer *node;
5035         struct btrfs_inode_item *ii;
5036         struct btrfs_key key;
5037         u64 inode_id;
5038         u32 mode;
5039         u64 nlink;
5040         u64 nbytes;
5041         u64 isize;
5042         u64 size = 0;
5043         u64 refs = 0;
5044         u64 extent_end = 0;
5045         u64 extent_size = 0;
5046         unsigned int dir;
5047         unsigned int nodatasum;
5048         int slot;
5049         int ret;
5050         int err = 0;
5051
5052         node = path->nodes[0];
5053         slot = path->slots[0];
5054
5055         btrfs_item_key_to_cpu(node, &key, slot);
5056         inode_id = key.objectid;
5057
5058         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5059                 ret = btrfs_next_item(root, path);
5060                 if (ret > 0)
5061                         err |= LAST_ITEM;
5062                 return err;
5063         }
5064
5065         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5066         isize = btrfs_inode_size(node, ii);
5067         nbytes = btrfs_inode_nbytes(node, ii);
5068         mode = btrfs_inode_mode(node, ii);
5069         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5070         nlink = btrfs_inode_nlink(node, ii);
5071         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5072
5073         while (1) {
5074                 ret = btrfs_next_item(root, path);
5075                 if (ret < 0) {
5076                         /* out will fill 'err' rusing current statistics */
5077                         goto out;
5078                 } else if (ret > 0) {
5079                         err |= LAST_ITEM;
5080                         goto out;
5081                 }
5082
5083                 node = path->nodes[0];
5084                 slot = path->slots[0];
5085                 btrfs_item_key_to_cpu(node, &key, slot);
5086                 if (key.objectid != inode_id)
5087                         goto out;
5088
5089                 switch (key.type) {
5090                 case BTRFS_INODE_REF_KEY:
5091                         ret = check_inode_ref(root, &key, node, slot, &refs,
5092                                               mode);
5093                         err |= ret;
5094                         break;
5095                 case BTRFS_INODE_EXTREF_KEY:
5096                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5097                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5098                                         root->objectid, key.objectid,
5099                                         key.offset);
5100                         ret = check_inode_extref(root, &key, node, slot, &refs,
5101                                                  mode);
5102                         err |= ret;
5103                         break;
5104                 case BTRFS_DIR_ITEM_KEY:
5105                 case BTRFS_DIR_INDEX_KEY:
5106                         if (!dir) {
5107                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5108                                         root->objectid, inode_id,
5109                                         imode_to_type(mode), key.objectid,
5110                                         key.offset);
5111                         }
5112                         ret = check_dir_item(root, &key, node, slot, &size,
5113                                              ext_ref);
5114                         err |= ret;
5115                         break;
5116                 case BTRFS_EXTENT_DATA_KEY:
5117                         if (dir) {
5118                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5119                                         root->objectid, inode_id, key.objectid,
5120                                         key.offset);
5121                         }
5122                         ret = check_file_extent(root, &key, node, slot,
5123                                                 nodatasum, &extent_size,
5124                                                 &extent_end);
5125                         err |= ret;
5126                         break;
5127                 case BTRFS_XATTR_ITEM_KEY:
5128                         break;
5129                 default:
5130                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5131                               key.objectid, key.type, key.offset);
5132                 }
5133         }
5134
5135 out:
5136         /* verify INODE_ITEM nlink/isize/nbytes */
5137         if (dir) {
5138                 if (nlink != 1) {
5139                         err |= LINK_COUNT_ERROR;
5140                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5141                               root->objectid, inode_id, nlink);
5142                 }
5143
5144                 /*
5145                  * Just a warning, as dir inode nbytes is just an
5146                  * instructive value.
5147                  */
5148                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5149                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5150                                 root->objectid, inode_id,
5151                                 root->fs_info->nodesize);
5152                 }
5153
5154                 if (isize != size) {
5155                         err |= ISIZE_ERROR;
5156                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5157                               root->objectid, inode_id, isize, size);
5158                 }
5159         } else {
5160                 if (nlink != refs) {
5161                         err |= LINK_COUNT_ERROR;
5162                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5163                               root->objectid, inode_id, nlink, refs);
5164                 } else if (!nlink) {
5165                         err |= ORPHAN_ITEM;
5166                 }
5167
5168                 if (!nbytes && !no_holes && extent_end < isize) {
5169                         err |= NBYTES_ERROR;
5170                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5171                               root->objectid, inode_id, isize);
5172                 }
5173
5174                 if (nbytes != extent_size) {
5175                         if (repair)
5176                                 ret = repair_inode_nbytes_lowmem(root, path,
5177                                                          inode_id, extent_size);
5178                         if (!repair || ret) {
5179                                 err |= NBYTES_ERROR;
5180                                 error(
5181         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5182                                       root->objectid, inode_id, nbytes,
5183                                       extent_size);
5184                         }
5185                 }
5186         }
5187
5188         return err;
5189 }
5190
5191 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5192 {
5193         struct btrfs_path path;
5194         struct btrfs_key key;
5195         int err = 0;
5196         int ret;
5197
5198         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5199         key.type = BTRFS_INODE_ITEM_KEY;
5200         key.offset = 0;
5201
5202         /* For root being dropped, we don't need to check first inode */
5203         if (btrfs_root_refs(&root->root_item) == 0 &&
5204             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5205             key.objectid)
5206                 return 0;
5207
5208         btrfs_init_path(&path);
5209
5210         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5211         if (ret < 0)
5212                 goto out;
5213         if (ret > 0) {
5214                 ret = 0;
5215                 err |= INODE_ITEM_MISSING;
5216                 error("first inode item of root %llu is missing",
5217                       root->objectid);
5218         }
5219
5220         err |= check_inode_item(root, &path, ext_ref);
5221         err &= ~LAST_ITEM;
5222         if (err && !ret)
5223                 ret = -EIO;
5224 out:
5225         btrfs_release_path(&path);
5226         return ret;
5227 }
5228
5229 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5230                                                 u64 parent, u64 root)
5231 {
5232         struct rb_node *node;
5233         struct tree_backref *back = NULL;
5234         struct tree_backref match = {
5235                 .node = {
5236                         .is_data = 0,
5237                 },
5238         };
5239
5240         if (parent) {
5241                 match.parent = parent;
5242                 match.node.full_backref = 1;
5243         } else {
5244                 match.root = root;
5245         }
5246
5247         node = rb_search(&rec->backref_tree, &match.node.node,
5248                          (rb_compare_keys)compare_extent_backref, NULL);
5249         if (node)
5250                 back = to_tree_backref(rb_node_to_extent_backref(node));
5251
5252         return back;
5253 }
5254
5255 static struct data_backref *find_data_backref(struct extent_record *rec,
5256                                                 u64 parent, u64 root,
5257                                                 u64 owner, u64 offset,
5258                                                 int found_ref,
5259                                                 u64 disk_bytenr, u64 bytes)
5260 {
5261         struct rb_node *node;
5262         struct data_backref *back = NULL;
5263         struct data_backref match = {
5264                 .node = {
5265                         .is_data = 1,
5266                 },
5267                 .owner = owner,
5268                 .offset = offset,
5269                 .bytes = bytes,
5270                 .found_ref = found_ref,
5271                 .disk_bytenr = disk_bytenr,
5272         };
5273
5274         if (parent) {
5275                 match.parent = parent;
5276                 match.node.full_backref = 1;
5277         } else {
5278                 match.root = root;
5279         }
5280
5281         node = rb_search(&rec->backref_tree, &match.node.node,
5282                          (rb_compare_keys)compare_extent_backref, NULL);
5283         if (node)
5284                 back = to_data_backref(rb_node_to_extent_backref(node));
5285
5286         return back;
5287 }
5288 /*
5289  * Iterate all item on the tree and call check_inode_item() to check.
5290  *
5291  * @root:       the root of the tree to be checked.
5292  * @ext_ref:    the EXTENDED_IREF feature
5293  *
5294  * Return 0 if no error found.
5295  * Return <0 for error.
5296  */
5297 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5298 {
5299         struct btrfs_path path;
5300         struct node_refs nrefs;
5301         struct btrfs_root_item *root_item = &root->root_item;
5302         int ret;
5303         int level;
5304         int err = 0;
5305
5306         /*
5307          * We need to manually check the first inode item(256)
5308          * As the following traversal function will only start from
5309          * the first inode item in the leaf, if inode item(256) is missing
5310          * we will just skip it forever.
5311          */
5312         ret = check_fs_first_inode(root, ext_ref);
5313         if (ret < 0)
5314                 return ret;
5315
5316         memset(&nrefs, 0, sizeof(nrefs));
5317         level = btrfs_header_level(root->node);
5318         btrfs_init_path(&path);
5319
5320         if (btrfs_root_refs(root_item) > 0 ||
5321             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5322                 path.nodes[level] = root->node;
5323                 path.slots[level] = 0;
5324                 extent_buffer_get(root->node);
5325         } else {
5326                 struct btrfs_key key;
5327
5328                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5329                 level = root_item->drop_level;
5330                 path.lowest_level = level;
5331                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5332                 if (ret < 0)
5333                         goto out;
5334                 ret = 0;
5335         }
5336
5337         while (1) {
5338                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5339                 err |= !!ret;
5340
5341                 /* if ret is negative, walk shall stop */
5342                 if (ret < 0) {
5343                         ret = err;
5344                         break;
5345                 }
5346
5347                 ret = walk_up_tree_v2(root, &path, &level);
5348                 if (ret != 0) {
5349                         /* Normal exit, reset ret to err */
5350                         ret = err;
5351                         break;
5352                 }
5353         }
5354
5355 out:
5356         btrfs_release_path(&path);
5357         return ret;
5358 }
5359
5360 /*
5361  * Find the relative ref for root_ref and root_backref.
5362  *
5363  * @root:       the root of the root tree.
5364  * @ref_key:    the key of the root ref.
5365  *
5366  * Return 0 if no error occurred.
5367  */
5368 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5369                           struct extent_buffer *node, int slot)
5370 {
5371         struct btrfs_path path;
5372         struct btrfs_key key;
5373         struct btrfs_root_ref *ref;
5374         struct btrfs_root_ref *backref;
5375         char ref_name[BTRFS_NAME_LEN] = {0};
5376         char backref_name[BTRFS_NAME_LEN] = {0};
5377         u64 ref_dirid;
5378         u64 ref_seq;
5379         u32 ref_namelen;
5380         u64 backref_dirid;
5381         u64 backref_seq;
5382         u32 backref_namelen;
5383         u32 len;
5384         int ret;
5385         int err = 0;
5386
5387         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5388         ref_dirid = btrfs_root_ref_dirid(node, ref);
5389         ref_seq = btrfs_root_ref_sequence(node, ref);
5390         ref_namelen = btrfs_root_ref_name_len(node, ref);
5391
5392         if (ref_namelen <= BTRFS_NAME_LEN) {
5393                 len = ref_namelen;
5394         } else {
5395                 len = BTRFS_NAME_LEN;
5396                 warning("%s[%llu %llu] ref_name too long",
5397                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5398                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5399                         ref_key->offset);
5400         }
5401         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5402
5403         /* Find relative root_ref */
5404         key.objectid = ref_key->offset;
5405         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5406         key.offset = ref_key->objectid;
5407
5408         btrfs_init_path(&path);
5409         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5410         if (ret) {
5411                 err |= ROOT_REF_MISSING;
5412                 error("%s[%llu %llu] couldn't find relative ref",
5413                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5414                       "ROOT_REF" : "ROOT_BACKREF",
5415                       ref_key->objectid, ref_key->offset);
5416                 goto out;
5417         }
5418
5419         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5420                                  struct btrfs_root_ref);
5421         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5422         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5423         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5424
5425         if (backref_namelen <= BTRFS_NAME_LEN) {
5426                 len = backref_namelen;
5427         } else {
5428                 len = BTRFS_NAME_LEN;
5429                 warning("%s[%llu %llu] ref_name too long",
5430                         key.type == BTRFS_ROOT_REF_KEY ?
5431                         "ROOT_REF" : "ROOT_BACKREF",
5432                         key.objectid, key.offset);
5433         }
5434         read_extent_buffer(path.nodes[0], backref_name,
5435                            (unsigned long)(backref + 1), len);
5436
5437         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5438             ref_namelen != backref_namelen ||
5439             strncmp(ref_name, backref_name, len)) {
5440                 err |= ROOT_REF_MISMATCH;
5441                 error("%s[%llu %llu] mismatch relative ref",
5442                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5443                       "ROOT_REF" : "ROOT_BACKREF",
5444                       ref_key->objectid, ref_key->offset);
5445         }
5446 out:
5447         btrfs_release_path(&path);
5448         return err;
5449 }
5450
5451 /*
5452  * Check all fs/file tree in low_memory mode.
5453  *
5454  * 1. for fs tree root item, call check_fs_root_v2()
5455  * 2. for fs tree root ref/backref, call check_root_ref()
5456  *
5457  * Return 0 if no error occurred.
5458  */
5459 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5460 {
5461         struct btrfs_root *tree_root = fs_info->tree_root;
5462         struct btrfs_root *cur_root = NULL;
5463         struct btrfs_path path;
5464         struct btrfs_key key;
5465         struct extent_buffer *node;
5466         unsigned int ext_ref;
5467         int slot;
5468         int ret;
5469         int err = 0;
5470
5471         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5472
5473         btrfs_init_path(&path);
5474         key.objectid = BTRFS_FS_TREE_OBJECTID;
5475         key.offset = 0;
5476         key.type = BTRFS_ROOT_ITEM_KEY;
5477
5478         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5479         if (ret < 0) {
5480                 err = ret;
5481                 goto out;
5482         } else if (ret > 0) {
5483                 err = -ENOENT;
5484                 goto out;
5485         }
5486
5487         while (1) {
5488                 node = path.nodes[0];
5489                 slot = path.slots[0];
5490                 btrfs_item_key_to_cpu(node, &key, slot);
5491                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5492                         goto out;
5493                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5494                     fs_root_objectid(key.objectid)) {
5495                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5496                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5497                                                                        &key);
5498                         } else {
5499                                 key.offset = (u64)-1;
5500                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5501                         }
5502
5503                         if (IS_ERR(cur_root)) {
5504                                 error("Fail to read fs/subvol tree: %lld",
5505                                       key.objectid);
5506                                 err = -EIO;
5507                                 goto next;
5508                         }
5509
5510                         ret = check_fs_root_v2(cur_root, ext_ref);
5511                         err |= ret;
5512
5513                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5514                                 btrfs_free_fs_root(cur_root);
5515                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5516                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5517                         ret = check_root_ref(tree_root, &key, node, slot);
5518                         err |= ret;
5519                 }
5520 next:
5521                 ret = btrfs_next_item(tree_root, &path);
5522                 if (ret > 0)
5523                         goto out;
5524                 if (ret < 0) {
5525                         err = ret;
5526                         goto out;
5527                 }
5528         }
5529
5530 out:
5531         btrfs_release_path(&path);
5532         return err;
5533 }
5534
5535 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5536                           struct cache_tree *root_cache)
5537 {
5538         int ret;
5539
5540         if (!ctx.progress_enabled)
5541                 fprintf(stderr, "checking fs roots\n");
5542         if (check_mode == CHECK_MODE_LOWMEM)
5543                 ret = check_fs_roots_v2(fs_info);
5544         else
5545                 ret = check_fs_roots(fs_info, root_cache);
5546
5547         return ret;
5548 }
5549
5550 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5551 {
5552         struct extent_backref *back, *tmp;
5553         struct tree_backref *tback;
5554         struct data_backref *dback;
5555         u64 found = 0;
5556         int err = 0;
5557
5558         rbtree_postorder_for_each_entry_safe(back, tmp,
5559                                              &rec->backref_tree, node) {
5560                 if (!back->found_extent_tree) {
5561                         err = 1;
5562                         if (!print_errs)
5563                                 goto out;
5564                         if (back->is_data) {
5565                                 dback = to_data_backref(back);
5566                                 fprintf(stderr, "Data backref %llu %s %llu"
5567                                         " owner %llu offset %llu num_refs %lu"
5568                                         " not found in extent tree\n",
5569                                         (unsigned long long)rec->start,
5570                                         back->full_backref ?
5571                                         "parent" : "root",
5572                                         back->full_backref ?
5573                                         (unsigned long long)dback->parent:
5574                                         (unsigned long long)dback->root,
5575                                         (unsigned long long)dback->owner,
5576                                         (unsigned long long)dback->offset,
5577                                         (unsigned long)dback->num_refs);
5578                         } else {
5579                                 tback = to_tree_backref(back);
5580                                 fprintf(stderr, "Tree backref %llu parent %llu"
5581                                         " root %llu not found in extent tree\n",
5582                                         (unsigned long long)rec->start,
5583                                         (unsigned long long)tback->parent,
5584                                         (unsigned long long)tback->root);
5585                         }
5586                 }
5587                 if (!back->is_data && !back->found_ref) {
5588                         err = 1;
5589                         if (!print_errs)
5590                                 goto out;
5591                         tback = to_tree_backref(back);
5592                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5593                                 (unsigned long long)rec->start,
5594                                 back->full_backref ? "parent" : "root",
5595                                 back->full_backref ?
5596                                 (unsigned long long)tback->parent :
5597                                 (unsigned long long)tback->root, back);
5598                 }
5599                 if (back->is_data) {
5600                         dback = to_data_backref(back);
5601                         if (dback->found_ref != dback->num_refs) {
5602                                 err = 1;
5603                                 if (!print_errs)
5604                                         goto out;
5605                                 fprintf(stderr, "Incorrect local backref count"
5606                                         " on %llu %s %llu owner %llu"
5607                                         " offset %llu found %u wanted %u back %p\n",
5608                                         (unsigned long long)rec->start,
5609                                         back->full_backref ?
5610                                         "parent" : "root",
5611                                         back->full_backref ?
5612                                         (unsigned long long)dback->parent:
5613                                         (unsigned long long)dback->root,
5614                                         (unsigned long long)dback->owner,
5615                                         (unsigned long long)dback->offset,
5616                                         dback->found_ref, dback->num_refs, back);
5617                         }
5618                         if (dback->disk_bytenr != rec->start) {
5619                                 err = 1;
5620                                 if (!print_errs)
5621                                         goto out;
5622                                 fprintf(stderr, "Backref disk bytenr does not"
5623                                         " match extent record, bytenr=%llu, "
5624                                         "ref bytenr=%llu\n",
5625                                         (unsigned long long)rec->start,
5626                                         (unsigned long long)dback->disk_bytenr);
5627                         }
5628
5629                         if (dback->bytes != rec->nr) {
5630                                 err = 1;
5631                                 if (!print_errs)
5632                                         goto out;
5633                                 fprintf(stderr, "Backref bytes do not match "
5634                                         "extent backref, bytenr=%llu, ref "
5635                                         "bytes=%llu, backref bytes=%llu\n",
5636                                         (unsigned long long)rec->start,
5637                                         (unsigned long long)rec->nr,
5638                                         (unsigned long long)dback->bytes);
5639                         }
5640                 }
5641                 if (!back->is_data) {
5642                         found += 1;
5643                 } else {
5644                         dback = to_data_backref(back);
5645                         found += dback->found_ref;
5646                 }
5647         }
5648         if (found != rec->refs) {
5649                 err = 1;
5650                 if (!print_errs)
5651                         goto out;
5652                 fprintf(stderr, "Incorrect global backref count "
5653                         "on %llu found %llu wanted %llu\n",
5654                         (unsigned long long)rec->start,
5655                         (unsigned long long)found,
5656                         (unsigned long long)rec->refs);
5657         }
5658 out:
5659         return err;
5660 }
5661
5662 static void __free_one_backref(struct rb_node *node)
5663 {
5664         struct extent_backref *back = rb_node_to_extent_backref(node);
5665
5666         free(back);
5667 }
5668
5669 static void free_all_extent_backrefs(struct extent_record *rec)
5670 {
5671         rb_free_nodes(&rec->backref_tree, __free_one_backref);
5672 }
5673
5674 static void free_extent_record_cache(struct cache_tree *extent_cache)
5675 {
5676         struct cache_extent *cache;
5677         struct extent_record *rec;
5678
5679         while (1) {
5680                 cache = first_cache_extent(extent_cache);
5681                 if (!cache)
5682                         break;
5683                 rec = container_of(cache, struct extent_record, cache);
5684                 remove_cache_extent(extent_cache, cache);
5685                 free_all_extent_backrefs(rec);
5686                 free(rec);
5687         }
5688 }
5689
5690 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5691                                  struct extent_record *rec)
5692 {
5693         if (rec->content_checked && rec->owner_ref_checked &&
5694             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5695             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5696             !rec->bad_full_backref && !rec->crossing_stripes &&
5697             !rec->wrong_chunk_type) {
5698                 remove_cache_extent(extent_cache, &rec->cache);
5699                 free_all_extent_backrefs(rec);
5700                 list_del_init(&rec->list);
5701                 free(rec);
5702         }
5703         return 0;
5704 }
5705
5706 static int check_owner_ref(struct btrfs_root *root,
5707                             struct extent_record *rec,
5708                             struct extent_buffer *buf)
5709 {
5710         struct extent_backref *node, *tmp;
5711         struct tree_backref *back;
5712         struct btrfs_root *ref_root;
5713         struct btrfs_key key;
5714         struct btrfs_path path;
5715         struct extent_buffer *parent;
5716         int level;
5717         int found = 0;
5718         int ret;
5719
5720         rbtree_postorder_for_each_entry_safe(node, tmp,
5721                                              &rec->backref_tree, node) {
5722                 if (node->is_data)
5723                         continue;
5724                 if (!node->found_ref)
5725                         continue;
5726                 if (node->full_backref)
5727                         continue;
5728                 back = to_tree_backref(node);
5729                 if (btrfs_header_owner(buf) == back->root)
5730                         return 0;
5731         }
5732         BUG_ON(rec->is_root);
5733
5734         /* try to find the block by search corresponding fs tree */
5735         key.objectid = btrfs_header_owner(buf);
5736         key.type = BTRFS_ROOT_ITEM_KEY;
5737         key.offset = (u64)-1;
5738
5739         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5740         if (IS_ERR(ref_root))
5741                 return 1;
5742
5743         level = btrfs_header_level(buf);
5744         if (level == 0)
5745                 btrfs_item_key_to_cpu(buf, &key, 0);
5746         else
5747                 btrfs_node_key_to_cpu(buf, &key, 0);
5748
5749         btrfs_init_path(&path);
5750         path.lowest_level = level + 1;
5751         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5752         if (ret < 0)
5753                 return 0;
5754
5755         parent = path.nodes[level + 1];
5756         if (parent && buf->start == btrfs_node_blockptr(parent,
5757                                                         path.slots[level + 1]))
5758                 found = 1;
5759
5760         btrfs_release_path(&path);
5761         return found ? 0 : 1;
5762 }
5763
5764 static int is_extent_tree_record(struct extent_record *rec)
5765 {
5766         struct extent_backref *node, *tmp;
5767         struct tree_backref *back;
5768         int is_extent = 0;
5769
5770         rbtree_postorder_for_each_entry_safe(node, tmp,
5771                                              &rec->backref_tree, node) {
5772                 if (node->is_data)
5773                         return 0;
5774                 back = to_tree_backref(node);
5775                 if (node->full_backref)
5776                         return 0;
5777                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5778                         is_extent = 1;
5779         }
5780         return is_extent;
5781 }
5782
5783
5784 static int record_bad_block_io(struct btrfs_fs_info *info,
5785                                struct cache_tree *extent_cache,
5786                                u64 start, u64 len)
5787 {
5788         struct extent_record *rec;
5789         struct cache_extent *cache;
5790         struct btrfs_key key;
5791
5792         cache = lookup_cache_extent(extent_cache, start, len);
5793         if (!cache)
5794                 return 0;
5795
5796         rec = container_of(cache, struct extent_record, cache);
5797         if (!is_extent_tree_record(rec))
5798                 return 0;
5799
5800         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5801         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5802 }
5803
5804 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5805                        struct extent_buffer *buf, int slot)
5806 {
5807         if (btrfs_header_level(buf)) {
5808                 struct btrfs_key_ptr ptr1, ptr2;
5809
5810                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5811                                    sizeof(struct btrfs_key_ptr));
5812                 read_extent_buffer(buf, &ptr2,
5813                                    btrfs_node_key_ptr_offset(slot + 1),
5814                                    sizeof(struct btrfs_key_ptr));
5815                 write_extent_buffer(buf, &ptr1,
5816                                     btrfs_node_key_ptr_offset(slot + 1),
5817                                     sizeof(struct btrfs_key_ptr));
5818                 write_extent_buffer(buf, &ptr2,
5819                                     btrfs_node_key_ptr_offset(slot),
5820                                     sizeof(struct btrfs_key_ptr));
5821                 if (slot == 0) {
5822                         struct btrfs_disk_key key;
5823                         btrfs_node_key(buf, &key, 0);
5824                         btrfs_fixup_low_keys(root, path, &key,
5825                                              btrfs_header_level(buf) + 1);
5826                 }
5827         } else {
5828                 struct btrfs_item *item1, *item2;
5829                 struct btrfs_key k1, k2;
5830                 char *item1_data, *item2_data;
5831                 u32 item1_offset, item2_offset, item1_size, item2_size;
5832
5833                 item1 = btrfs_item_nr(slot);
5834                 item2 = btrfs_item_nr(slot + 1);
5835                 btrfs_item_key_to_cpu(buf, &k1, slot);
5836                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5837                 item1_offset = btrfs_item_offset(buf, item1);
5838                 item2_offset = btrfs_item_offset(buf, item2);
5839                 item1_size = btrfs_item_size(buf, item1);
5840                 item2_size = btrfs_item_size(buf, item2);
5841
5842                 item1_data = malloc(item1_size);
5843                 if (!item1_data)
5844                         return -ENOMEM;
5845                 item2_data = malloc(item2_size);
5846                 if (!item2_data) {
5847                         free(item1_data);
5848                         return -ENOMEM;
5849                 }
5850
5851                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5852                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5853
5854                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5855                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5856                 free(item1_data);
5857                 free(item2_data);
5858
5859                 btrfs_set_item_offset(buf, item1, item2_offset);
5860                 btrfs_set_item_offset(buf, item2, item1_offset);
5861                 btrfs_set_item_size(buf, item1, item2_size);
5862                 btrfs_set_item_size(buf, item2, item1_size);
5863
5864                 path->slots[0] = slot;
5865                 btrfs_set_item_key_unsafe(root, path, &k2);
5866                 path->slots[0] = slot + 1;
5867                 btrfs_set_item_key_unsafe(root, path, &k1);
5868         }
5869         return 0;
5870 }
5871
5872 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5873 {
5874         struct extent_buffer *buf;
5875         struct btrfs_key k1, k2;
5876         int i;
5877         int level = path->lowest_level;
5878         int ret = -EIO;
5879
5880         buf = path->nodes[level];
5881         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5882                 if (level) {
5883                         btrfs_node_key_to_cpu(buf, &k1, i);
5884                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5885                 } else {
5886                         btrfs_item_key_to_cpu(buf, &k1, i);
5887                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5888                 }
5889                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5890                         continue;
5891                 ret = swap_values(root, path, buf, i);
5892                 if (ret)
5893                         break;
5894                 btrfs_mark_buffer_dirty(buf);
5895                 i = 0;
5896         }
5897         return ret;
5898 }
5899
5900 static int delete_bogus_item(struct btrfs_root *root,
5901                              struct btrfs_path *path,
5902                              struct extent_buffer *buf, int slot)
5903 {
5904         struct btrfs_key key;
5905         int nritems = btrfs_header_nritems(buf);
5906
5907         btrfs_item_key_to_cpu(buf, &key, slot);
5908
5909         /* These are all the keys we can deal with missing. */
5910         if (key.type != BTRFS_DIR_INDEX_KEY &&
5911             key.type != BTRFS_EXTENT_ITEM_KEY &&
5912             key.type != BTRFS_METADATA_ITEM_KEY &&
5913             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5914             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5915                 return -1;
5916
5917         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5918                (unsigned long long)key.objectid, key.type,
5919                (unsigned long long)key.offset, slot, buf->start);
5920         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5921                               btrfs_item_nr_offset(slot + 1),
5922                               sizeof(struct btrfs_item) *
5923                               (nritems - slot - 1));
5924         btrfs_set_header_nritems(buf, nritems - 1);
5925         if (slot == 0) {
5926                 struct btrfs_disk_key disk_key;
5927
5928                 btrfs_item_key(buf, &disk_key, 0);
5929                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5930         }
5931         btrfs_mark_buffer_dirty(buf);
5932         return 0;
5933 }
5934
5935 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5936 {
5937         struct extent_buffer *buf;
5938         int i;
5939         int ret = 0;
5940
5941         /* We should only get this for leaves */
5942         BUG_ON(path->lowest_level);
5943         buf = path->nodes[0];
5944 again:
5945         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5946                 unsigned int shift = 0, offset;
5947
5948                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5949                     BTRFS_LEAF_DATA_SIZE(root)) {
5950                         if (btrfs_item_end_nr(buf, i) >
5951                             BTRFS_LEAF_DATA_SIZE(root)) {
5952                                 ret = delete_bogus_item(root, path, buf, i);
5953                                 if (!ret)
5954                                         goto again;
5955                                 fprintf(stderr, "item is off the end of the "
5956                                         "leaf, can't fix\n");
5957                                 ret = -EIO;
5958                                 break;
5959                         }
5960                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5961                                 btrfs_item_end_nr(buf, i);
5962                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5963                            btrfs_item_offset_nr(buf, i - 1)) {
5964                         if (btrfs_item_end_nr(buf, i) >
5965                             btrfs_item_offset_nr(buf, i - 1)) {
5966                                 ret = delete_bogus_item(root, path, buf, i);
5967                                 if (!ret)
5968                                         goto again;
5969                                 fprintf(stderr, "items overlap, can't fix\n");
5970                                 ret = -EIO;
5971                                 break;
5972                         }
5973                         shift = btrfs_item_offset_nr(buf, i - 1) -
5974                                 btrfs_item_end_nr(buf, i);
5975                 }
5976                 if (!shift)
5977                         continue;
5978
5979                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5980                        i, shift, (unsigned long long)buf->start);
5981                 offset = btrfs_item_offset_nr(buf, i);
5982                 memmove_extent_buffer(buf,
5983                                       btrfs_leaf_data(buf) + offset + shift,
5984                                       btrfs_leaf_data(buf) + offset,
5985                                       btrfs_item_size_nr(buf, i));
5986                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5987                                       offset + shift);
5988                 btrfs_mark_buffer_dirty(buf);
5989         }
5990
5991         /*
5992          * We may have moved things, in which case we want to exit so we don't
5993          * write those changes out.  Once we have proper abort functionality in
5994          * progs this can be changed to something nicer.
5995          */
5996         BUG_ON(ret);
5997         return ret;
5998 }
5999
6000 /*
6001  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6002  * then just return -EIO.
6003  */
6004 static int try_to_fix_bad_block(struct btrfs_root *root,
6005                                 struct extent_buffer *buf,
6006                                 enum btrfs_tree_block_status status)
6007 {
6008         struct btrfs_trans_handle *trans;
6009         struct ulist *roots;
6010         struct ulist_node *node;
6011         struct btrfs_root *search_root;
6012         struct btrfs_path path;
6013         struct ulist_iterator iter;
6014         struct btrfs_key root_key, key;
6015         int ret;
6016
6017         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6018             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6019                 return -EIO;
6020
6021         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6022         if (ret)
6023                 return -EIO;
6024
6025         btrfs_init_path(&path);
6026         ULIST_ITER_INIT(&iter);
6027         while ((node = ulist_next(roots, &iter))) {
6028                 root_key.objectid = node->val;
6029                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6030                 root_key.offset = (u64)-1;
6031
6032                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6033                 if (IS_ERR(root)) {
6034                         ret = -EIO;
6035                         break;
6036                 }
6037
6038
6039                 trans = btrfs_start_transaction(search_root, 0);
6040                 if (IS_ERR(trans)) {
6041                         ret = PTR_ERR(trans);
6042                         break;
6043                 }
6044
6045                 path.lowest_level = btrfs_header_level(buf);
6046                 path.skip_check_block = 1;
6047                 if (path.lowest_level)
6048                         btrfs_node_key_to_cpu(buf, &key, 0);
6049                 else
6050                         btrfs_item_key_to_cpu(buf, &key, 0);
6051                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6052                 if (ret) {
6053                         ret = -EIO;
6054                         btrfs_commit_transaction(trans, search_root);
6055                         break;
6056                 }
6057                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6058                         ret = fix_key_order(search_root, &path);
6059                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6060                         ret = fix_item_offset(search_root, &path);
6061                 if (ret) {
6062                         btrfs_commit_transaction(trans, search_root);
6063                         break;
6064                 }
6065                 btrfs_release_path(&path);
6066                 btrfs_commit_transaction(trans, search_root);
6067         }
6068         ulist_free(roots);
6069         btrfs_release_path(&path);
6070         return ret;
6071 }
6072
6073 static int check_block(struct btrfs_root *root,
6074                        struct cache_tree *extent_cache,
6075                        struct extent_buffer *buf, u64 flags)
6076 {
6077         struct extent_record *rec;
6078         struct cache_extent *cache;
6079         struct btrfs_key key;
6080         enum btrfs_tree_block_status status;
6081         int ret = 0;
6082         int level;
6083
6084         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6085         if (!cache)
6086                 return 1;
6087         rec = container_of(cache, struct extent_record, cache);
6088         rec->generation = btrfs_header_generation(buf);
6089
6090         level = btrfs_header_level(buf);
6091         if (btrfs_header_nritems(buf) > 0) {
6092
6093                 if (level == 0)
6094                         btrfs_item_key_to_cpu(buf, &key, 0);
6095                 else
6096                         btrfs_node_key_to_cpu(buf, &key, 0);
6097
6098                 rec->info_objectid = key.objectid;
6099         }
6100         rec->info_level = level;
6101
6102         if (btrfs_is_leaf(buf))
6103                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6104         else
6105                 status = btrfs_check_node(root, &rec->parent_key, buf);
6106
6107         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6108                 if (repair)
6109                         status = try_to_fix_bad_block(root, buf, status);
6110                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6111                         ret = -EIO;
6112                         fprintf(stderr, "bad block %llu\n",
6113                                 (unsigned long long)buf->start);
6114                 } else {
6115                         /*
6116                          * Signal to callers we need to start the scan over
6117                          * again since we'll have cowed blocks.
6118                          */
6119                         ret = -EAGAIN;
6120                 }
6121         } else {
6122                 rec->content_checked = 1;
6123                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6124                         rec->owner_ref_checked = 1;
6125                 else {
6126                         ret = check_owner_ref(root, rec, buf);
6127                         if (!ret)
6128                                 rec->owner_ref_checked = 1;
6129                 }
6130         }
6131         if (!ret)
6132                 maybe_free_extent_rec(extent_cache, rec);
6133         return ret;
6134 }
6135
6136 #if 0
6137 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6138                                                 u64 parent, u64 root)
6139 {
6140         struct list_head *cur = rec->backrefs.next;
6141         struct extent_backref *node;
6142         struct tree_backref *back;
6143
6144         while(cur != &rec->backrefs) {
6145                 node = to_extent_backref(cur);
6146                 cur = cur->next;
6147                 if (node->is_data)
6148                         continue;
6149                 back = to_tree_backref(node);
6150                 if (parent > 0) {
6151                         if (!node->full_backref)
6152                                 continue;
6153                         if (parent == back->parent)
6154                                 return back;
6155                 } else {
6156                         if (node->full_backref)
6157                                 continue;
6158                         if (back->root == root)
6159                                 return back;
6160                 }
6161         }
6162         return NULL;
6163 }
6164 #endif
6165
6166 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6167                                                 u64 parent, u64 root)
6168 {
6169         struct tree_backref *ref = malloc(sizeof(*ref));
6170
6171         if (!ref)
6172                 return NULL;
6173         memset(&ref->node, 0, sizeof(ref->node));
6174         if (parent > 0) {
6175                 ref->parent = parent;
6176                 ref->node.full_backref = 1;
6177         } else {
6178                 ref->root = root;
6179                 ref->node.full_backref = 0;
6180         }
6181
6182         return ref;
6183 }
6184
6185 #if 0
6186 static struct data_backref *find_data_backref(struct extent_record *rec,
6187                                                 u64 parent, u64 root,
6188                                                 u64 owner, u64 offset,
6189                                                 int found_ref,
6190                                                 u64 disk_bytenr, u64 bytes)
6191 {
6192         struct list_head *cur = rec->backrefs.next;
6193         struct extent_backref *node;
6194         struct data_backref *back;
6195
6196         while(cur != &rec->backrefs) {
6197                 node = to_extent_backref(cur);
6198                 cur = cur->next;
6199                 if (!node->is_data)
6200                         continue;
6201                 back = to_data_backref(node);
6202                 if (parent > 0) {
6203                         if (!node->full_backref)
6204                                 continue;
6205                         if (parent == back->parent)
6206                                 return back;
6207                 } else {
6208                         if (node->full_backref)
6209                                 continue;
6210                         if (back->root == root && back->owner == owner &&
6211                             back->offset == offset) {
6212                                 if (found_ref && node->found_ref &&
6213                                     (back->bytes != bytes ||
6214                                     back->disk_bytenr != disk_bytenr))
6215                                         continue;
6216                                 return back;
6217                         }
6218                 }
6219         }
6220         return NULL;
6221 }
6222 #endif
6223
6224 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6225                                                 u64 parent, u64 root,
6226                                                 u64 owner, u64 offset,
6227                                                 u64 max_size)
6228 {
6229         struct data_backref *ref = malloc(sizeof(*ref));
6230
6231         if (!ref)
6232                 return NULL;
6233         memset(&ref->node, 0, sizeof(ref->node));
6234         ref->node.is_data = 1;
6235
6236         if (parent > 0) {
6237                 ref->parent = parent;
6238                 ref->owner = 0;
6239                 ref->offset = 0;
6240                 ref->node.full_backref = 1;
6241         } else {
6242                 ref->root = root;
6243                 ref->owner = owner;
6244                 ref->offset = offset;
6245                 ref->node.full_backref = 0;
6246         }
6247         ref->bytes = max_size;
6248         ref->found_ref = 0;
6249         ref->num_refs = 0;
6250         if (max_size > rec->max_size)
6251                 rec->max_size = max_size;
6252         return ref;
6253 }
6254
6255 /* Check if the type of extent matches with its chunk */
6256 static void check_extent_type(struct extent_record *rec)
6257 {
6258         struct btrfs_block_group_cache *bg_cache;
6259
6260         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6261         if (!bg_cache)
6262                 return;
6263
6264         /* data extent, check chunk directly*/
6265         if (!rec->metadata) {
6266                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6267                         rec->wrong_chunk_type = 1;
6268                 return;
6269         }
6270
6271         /* metadata extent, check the obvious case first */
6272         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6273                                  BTRFS_BLOCK_GROUP_METADATA))) {
6274                 rec->wrong_chunk_type = 1;
6275                 return;
6276         }
6277
6278         /*
6279          * Check SYSTEM extent, as it's also marked as metadata, we can only
6280          * make sure it's a SYSTEM extent by its backref
6281          */
6282         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6283                 struct extent_backref *node;
6284                 struct tree_backref *tback;
6285                 u64 bg_type;
6286
6287                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6288                 if (node->is_data) {
6289                         /* tree block shouldn't have data backref */
6290                         rec->wrong_chunk_type = 1;
6291                         return;
6292                 }
6293                 tback = container_of(node, struct tree_backref, node);
6294
6295                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6296                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6297                 else
6298                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6299                 if (!(bg_cache->flags & bg_type))
6300                         rec->wrong_chunk_type = 1;
6301         }
6302 }
6303
6304 /*
6305  * Allocate a new extent record, fill default values from @tmpl and insert int
6306  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6307  * the cache, otherwise it fails.
6308  */
6309 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6310                 struct extent_record *tmpl)
6311 {
6312         struct extent_record *rec;
6313         int ret = 0;
6314
6315         BUG_ON(tmpl->max_size == 0);
6316         rec = malloc(sizeof(*rec));
6317         if (!rec)
6318                 return -ENOMEM;
6319         rec->start = tmpl->start;
6320         rec->max_size = tmpl->max_size;
6321         rec->nr = max(tmpl->nr, tmpl->max_size);
6322         rec->found_rec = tmpl->found_rec;
6323         rec->content_checked = tmpl->content_checked;
6324         rec->owner_ref_checked = tmpl->owner_ref_checked;
6325         rec->num_duplicates = 0;
6326         rec->metadata = tmpl->metadata;
6327         rec->flag_block_full_backref = FLAG_UNSET;
6328         rec->bad_full_backref = 0;
6329         rec->crossing_stripes = 0;
6330         rec->wrong_chunk_type = 0;
6331         rec->is_root = tmpl->is_root;
6332         rec->refs = tmpl->refs;
6333         rec->extent_item_refs = tmpl->extent_item_refs;
6334         rec->parent_generation = tmpl->parent_generation;
6335         INIT_LIST_HEAD(&rec->backrefs);
6336         INIT_LIST_HEAD(&rec->dups);
6337         INIT_LIST_HEAD(&rec->list);
6338         rec->backref_tree = RB_ROOT;
6339         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6340         rec->cache.start = tmpl->start;
6341         rec->cache.size = tmpl->nr;
6342         ret = insert_cache_extent(extent_cache, &rec->cache);
6343         if (ret) {
6344                 free(rec);
6345                 return ret;
6346         }
6347         bytes_used += rec->nr;
6348
6349         if (tmpl->metadata)
6350                 rec->crossing_stripes = check_crossing_stripes(global_info,
6351                                 rec->start, global_info->nodesize);
6352         check_extent_type(rec);
6353         return ret;
6354 }
6355
6356 /*
6357  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6358  * some are hints:
6359  * - refs              - if found, increase refs
6360  * - is_root           - if found, set
6361  * - content_checked   - if found, set
6362  * - owner_ref_checked - if found, set
6363  *
6364  * If not found, create a new one, initialize and insert.
6365  */
6366 static int add_extent_rec(struct cache_tree *extent_cache,
6367                 struct extent_record *tmpl)
6368 {
6369         struct extent_record *rec;
6370         struct cache_extent *cache;
6371         int ret = 0;
6372         int dup = 0;
6373
6374         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6375         if (cache) {
6376                 rec = container_of(cache, struct extent_record, cache);
6377                 if (tmpl->refs)
6378                         rec->refs++;
6379                 if (rec->nr == 1)
6380                         rec->nr = max(tmpl->nr, tmpl->max_size);
6381
6382                 /*
6383                  * We need to make sure to reset nr to whatever the extent
6384                  * record says was the real size, this way we can compare it to
6385                  * the backrefs.
6386                  */
6387                 if (tmpl->found_rec) {
6388                         if (tmpl->start != rec->start || rec->found_rec) {
6389                                 struct extent_record *tmp;
6390
6391                                 dup = 1;
6392                                 if (list_empty(&rec->list))
6393                                         list_add_tail(&rec->list,
6394                                                       &duplicate_extents);
6395
6396                                 /*
6397                                  * We have to do this song and dance in case we
6398                                  * find an extent record that falls inside of
6399                                  * our current extent record but does not have
6400                                  * the same objectid.
6401                                  */
6402                                 tmp = malloc(sizeof(*tmp));
6403                                 if (!tmp)
6404                                         return -ENOMEM;
6405                                 tmp->start = tmpl->start;
6406                                 tmp->max_size = tmpl->max_size;
6407                                 tmp->nr = tmpl->nr;
6408                                 tmp->found_rec = 1;
6409                                 tmp->metadata = tmpl->metadata;
6410                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6411                                 INIT_LIST_HEAD(&tmp->list);
6412                                 list_add_tail(&tmp->list, &rec->dups);
6413                                 rec->num_duplicates++;
6414                         } else {
6415                                 rec->nr = tmpl->nr;
6416                                 rec->found_rec = 1;
6417                         }
6418                 }
6419
6420                 if (tmpl->extent_item_refs && !dup) {
6421                         if (rec->extent_item_refs) {
6422                                 fprintf(stderr, "block %llu rec "
6423                                         "extent_item_refs %llu, passed %llu\n",
6424                                         (unsigned long long)tmpl->start,
6425                                         (unsigned long long)
6426                                                         rec->extent_item_refs,
6427                                         (unsigned long long)tmpl->extent_item_refs);
6428                         }
6429                         rec->extent_item_refs = tmpl->extent_item_refs;
6430                 }
6431                 if (tmpl->is_root)
6432                         rec->is_root = 1;
6433                 if (tmpl->content_checked)
6434                         rec->content_checked = 1;
6435                 if (tmpl->owner_ref_checked)
6436                         rec->owner_ref_checked = 1;
6437                 memcpy(&rec->parent_key, &tmpl->parent_key,
6438                                 sizeof(tmpl->parent_key));
6439                 if (tmpl->parent_generation)
6440                         rec->parent_generation = tmpl->parent_generation;
6441                 if (rec->max_size < tmpl->max_size)
6442                         rec->max_size = tmpl->max_size;
6443
6444                 /*
6445                  * A metadata extent can't cross stripe_len boundary, otherwise
6446                  * kernel scrub won't be able to handle it.
6447                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6448                  * it.
6449                  */
6450                 if (tmpl->metadata)
6451                         rec->crossing_stripes = check_crossing_stripes(
6452                                         global_info, rec->start,
6453                                         global_info->nodesize);
6454                 check_extent_type(rec);
6455                 maybe_free_extent_rec(extent_cache, rec);
6456                 return ret;
6457         }
6458
6459         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6460
6461         return ret;
6462 }
6463
6464 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6465                             u64 parent, u64 root, int found_ref)
6466 {
6467         struct extent_record *rec;
6468         struct tree_backref *back;
6469         struct cache_extent *cache;
6470         int ret;
6471         bool insert = false;
6472
6473         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6474         if (!cache) {
6475                 struct extent_record tmpl;
6476
6477                 memset(&tmpl, 0, sizeof(tmpl));
6478                 tmpl.start = bytenr;
6479                 tmpl.nr = 1;
6480                 tmpl.metadata = 1;
6481                 tmpl.max_size = 1;
6482
6483                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6484                 if (ret)
6485                         return ret;
6486
6487                 /* really a bug in cache_extent implement now */
6488                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6489                 if (!cache)
6490                         return -ENOENT;
6491         }
6492
6493         rec = container_of(cache, struct extent_record, cache);
6494         if (rec->start != bytenr) {
6495                 /*
6496                  * Several cause, from unaligned bytenr to over lapping extents
6497                  */
6498                 return -EEXIST;
6499         }
6500
6501         back = find_tree_backref(rec, parent, root);
6502         if (!back) {
6503                 back = alloc_tree_backref(rec, parent, root);
6504                 if (!back)
6505                         return -ENOMEM;
6506                 insert = true;
6507         }
6508
6509         if (found_ref) {
6510                 if (back->node.found_ref) {
6511                         fprintf(stderr, "Extent back ref already exists "
6512                                 "for %llu parent %llu root %llu \n",
6513                                 (unsigned long long)bytenr,
6514                                 (unsigned long long)parent,
6515                                 (unsigned long long)root);
6516                 }
6517                 back->node.found_ref = 1;
6518         } else {
6519                 if (back->node.found_extent_tree) {
6520                         fprintf(stderr, "Extent back ref already exists "
6521                                 "for %llu parent %llu root %llu \n",
6522                                 (unsigned long long)bytenr,
6523                                 (unsigned long long)parent,
6524                                 (unsigned long long)root);
6525                 }
6526                 back->node.found_extent_tree = 1;
6527         }
6528         if (insert)
6529                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6530                         compare_extent_backref));
6531         check_extent_type(rec);
6532         maybe_free_extent_rec(extent_cache, rec);
6533         return 0;
6534 }
6535
6536 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6537                             u64 parent, u64 root, u64 owner, u64 offset,
6538                             u32 num_refs, int found_ref, u64 max_size)
6539 {
6540         struct extent_record *rec;
6541         struct data_backref *back;
6542         struct cache_extent *cache;
6543         int ret;
6544         bool insert = false;
6545
6546         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6547         if (!cache) {
6548                 struct extent_record tmpl;
6549
6550                 memset(&tmpl, 0, sizeof(tmpl));
6551                 tmpl.start = bytenr;
6552                 tmpl.nr = 1;
6553                 tmpl.max_size = max_size;
6554
6555                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6556                 if (ret)
6557                         return ret;
6558
6559                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6560                 if (!cache)
6561                         abort();
6562         }
6563
6564         rec = container_of(cache, struct extent_record, cache);
6565         if (rec->max_size < max_size)
6566                 rec->max_size = max_size;
6567
6568         /*
6569          * If found_ref is set then max_size is the real size and must match the
6570          * existing refs.  So if we have already found a ref then we need to
6571          * make sure that this ref matches the existing one, otherwise we need
6572          * to add a new backref so we can notice that the backrefs don't match
6573          * and we need to figure out who is telling the truth.  This is to
6574          * account for that awful fsync bug I introduced where we'd end up with
6575          * a btrfs_file_extent_item that would have its length include multiple
6576          * prealloc extents or point inside of a prealloc extent.
6577          */
6578         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6579                                  bytenr, max_size);
6580         if (!back) {
6581                 back = alloc_data_backref(rec, parent, root, owner, offset,
6582                                           max_size);
6583                 BUG_ON(!back);
6584                 insert = true;
6585         }
6586
6587         if (found_ref) {
6588                 BUG_ON(num_refs != 1);
6589                 if (back->node.found_ref)
6590                         BUG_ON(back->bytes != max_size);
6591                 back->node.found_ref = 1;
6592                 back->found_ref += 1;
6593                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6594                         back->bytes = max_size;
6595                         back->disk_bytenr = bytenr;
6596
6597                         /* Need to reinsert if not already in the tree */
6598                         if (!insert) {
6599                                 rb_erase(&back->node.node, &rec->backref_tree);
6600                                 insert = true;
6601                         }
6602                 }
6603                 rec->refs += 1;
6604                 rec->content_checked = 1;
6605                 rec->owner_ref_checked = 1;
6606         } else {
6607                 if (back->node.found_extent_tree) {
6608                         fprintf(stderr, "Extent back ref already exists "
6609                                 "for %llu parent %llu root %llu "
6610                                 "owner %llu offset %llu num_refs %lu\n",
6611                                 (unsigned long long)bytenr,
6612                                 (unsigned long long)parent,
6613                                 (unsigned long long)root,
6614                                 (unsigned long long)owner,
6615                                 (unsigned long long)offset,
6616                                 (unsigned long)num_refs);
6617                 }
6618                 back->num_refs = num_refs;
6619                 back->node.found_extent_tree = 1;
6620         }
6621         if (insert)
6622                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6623                         compare_extent_backref));
6624
6625         maybe_free_extent_rec(extent_cache, rec);
6626         return 0;
6627 }
6628
6629 static int add_pending(struct cache_tree *pending,
6630                        struct cache_tree *seen, u64 bytenr, u32 size)
6631 {
6632         int ret;
6633         ret = add_cache_extent(seen, bytenr, size);
6634         if (ret)
6635                 return ret;
6636         add_cache_extent(pending, bytenr, size);
6637         return 0;
6638 }
6639
6640 static int pick_next_pending(struct cache_tree *pending,
6641                         struct cache_tree *reada,
6642                         struct cache_tree *nodes,
6643                         u64 last, struct block_info *bits, int bits_nr,
6644                         int *reada_bits)
6645 {
6646         unsigned long node_start = last;
6647         struct cache_extent *cache;
6648         int ret;
6649
6650         cache = search_cache_extent(reada, 0);
6651         if (cache) {
6652                 bits[0].start = cache->start;
6653                 bits[0].size = cache->size;
6654                 *reada_bits = 1;
6655                 return 1;
6656         }
6657         *reada_bits = 0;
6658         if (node_start > 32768)
6659                 node_start -= 32768;
6660
6661         cache = search_cache_extent(nodes, node_start);
6662         if (!cache)
6663                 cache = search_cache_extent(nodes, 0);
6664
6665         if (!cache) {
6666                  cache = search_cache_extent(pending, 0);
6667                  if (!cache)
6668                          return 0;
6669                  ret = 0;
6670                  do {
6671                          bits[ret].start = cache->start;
6672                          bits[ret].size = cache->size;
6673                          cache = next_cache_extent(cache);
6674                          ret++;
6675                  } while (cache && ret < bits_nr);
6676                  return ret;
6677         }
6678
6679         ret = 0;
6680         do {
6681                 bits[ret].start = cache->start;
6682                 bits[ret].size = cache->size;
6683                 cache = next_cache_extent(cache);
6684                 ret++;
6685         } while (cache && ret < bits_nr);
6686
6687         if (bits_nr - ret > 8) {
6688                 u64 lookup = bits[0].start + bits[0].size;
6689                 struct cache_extent *next;
6690                 next = search_cache_extent(pending, lookup);
6691                 while(next) {
6692                         if (next->start - lookup > 32768)
6693                                 break;
6694                         bits[ret].start = next->start;
6695                         bits[ret].size = next->size;
6696                         lookup = next->start + next->size;
6697                         ret++;
6698                         if (ret == bits_nr)
6699                                 break;
6700                         next = next_cache_extent(next);
6701                         if (!next)
6702                                 break;
6703                 }
6704         }
6705         return ret;
6706 }
6707
6708 static void free_chunk_record(struct cache_extent *cache)
6709 {
6710         struct chunk_record *rec;
6711
6712         rec = container_of(cache, struct chunk_record, cache);
6713         list_del_init(&rec->list);
6714         list_del_init(&rec->dextents);
6715         free(rec);
6716 }
6717
6718 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6719 {
6720         cache_tree_free_extents(chunk_cache, free_chunk_record);
6721 }
6722
6723 static void free_device_record(struct rb_node *node)
6724 {
6725         struct device_record *rec;
6726
6727         rec = container_of(node, struct device_record, node);
6728         free(rec);
6729 }
6730
6731 FREE_RB_BASED_TREE(device_cache, free_device_record);
6732
6733 int insert_block_group_record(struct block_group_tree *tree,
6734                               struct block_group_record *bg_rec)
6735 {
6736         int ret;
6737
6738         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6739         if (ret)
6740                 return ret;
6741
6742         list_add_tail(&bg_rec->list, &tree->block_groups);
6743         return 0;
6744 }
6745
6746 static void free_block_group_record(struct cache_extent *cache)
6747 {
6748         struct block_group_record *rec;
6749
6750         rec = container_of(cache, struct block_group_record, cache);
6751         list_del_init(&rec->list);
6752         free(rec);
6753 }
6754
6755 void free_block_group_tree(struct block_group_tree *tree)
6756 {
6757         cache_tree_free_extents(&tree->tree, free_block_group_record);
6758 }
6759
6760 int insert_device_extent_record(struct device_extent_tree *tree,
6761                                 struct device_extent_record *de_rec)
6762 {
6763         int ret;
6764
6765         /*
6766          * Device extent is a bit different from the other extents, because
6767          * the extents which belong to the different devices may have the
6768          * same start and size, so we need use the special extent cache
6769          * search/insert functions.
6770          */
6771         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6772         if (ret)
6773                 return ret;
6774
6775         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6776         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6777         return 0;
6778 }
6779
6780 static void free_device_extent_record(struct cache_extent *cache)
6781 {
6782         struct device_extent_record *rec;
6783
6784         rec = container_of(cache, struct device_extent_record, cache);
6785         if (!list_empty(&rec->chunk_list))
6786                 list_del_init(&rec->chunk_list);
6787         if (!list_empty(&rec->device_list))
6788                 list_del_init(&rec->device_list);
6789         free(rec);
6790 }
6791
6792 void free_device_extent_tree(struct device_extent_tree *tree)
6793 {
6794         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6795 }
6796
6797 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6798 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6799                                  struct extent_buffer *leaf, int slot)
6800 {
6801         struct btrfs_extent_ref_v0 *ref0;
6802         struct btrfs_key key;
6803         int ret;
6804
6805         btrfs_item_key_to_cpu(leaf, &key, slot);
6806         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6807         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6808                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6809                                 0, 0);
6810         } else {
6811                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6812                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6813         }
6814         return ret;
6815 }
6816 #endif
6817
6818 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6819                                             struct btrfs_key *key,
6820                                             int slot)
6821 {
6822         struct btrfs_chunk *ptr;
6823         struct chunk_record *rec;
6824         int num_stripes, i;
6825
6826         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6827         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6828
6829         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6830         if (!rec) {
6831                 fprintf(stderr, "memory allocation failed\n");
6832                 exit(-1);
6833         }
6834
6835         INIT_LIST_HEAD(&rec->list);
6836         INIT_LIST_HEAD(&rec->dextents);
6837         rec->bg_rec = NULL;
6838
6839         rec->cache.start = key->offset;
6840         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6841
6842         rec->generation = btrfs_header_generation(leaf);
6843
6844         rec->objectid = key->objectid;
6845         rec->type = key->type;
6846         rec->offset = key->offset;
6847
6848         rec->length = rec->cache.size;
6849         rec->owner = btrfs_chunk_owner(leaf, ptr);
6850         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6851         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6852         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6853         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6854         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6855         rec->num_stripes = num_stripes;
6856         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6857
6858         for (i = 0; i < rec->num_stripes; ++i) {
6859                 rec->stripes[i].devid =
6860                         btrfs_stripe_devid_nr(leaf, ptr, i);
6861                 rec->stripes[i].offset =
6862                         btrfs_stripe_offset_nr(leaf, ptr, i);
6863                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6864                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6865                                 BTRFS_UUID_SIZE);
6866         }
6867
6868         return rec;
6869 }
6870
6871 static int process_chunk_item(struct cache_tree *chunk_cache,
6872                               struct btrfs_key *key, struct extent_buffer *eb,
6873                               int slot)
6874 {
6875         struct chunk_record *rec;
6876         struct btrfs_chunk *chunk;
6877         int ret = 0;
6878
6879         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6880         /*
6881          * Do extra check for this chunk item,
6882          *
6883          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6884          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6885          * and owner<->key_type check.
6886          */
6887         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6888                                       key->offset);
6889         if (ret < 0) {
6890                 error("chunk(%llu, %llu) is not valid, ignore it",
6891                       key->offset, btrfs_chunk_length(eb, chunk));
6892                 return 0;
6893         }
6894         rec = btrfs_new_chunk_record(eb, key, slot);
6895         ret = insert_cache_extent(chunk_cache, &rec->cache);
6896         if (ret) {
6897                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6898                         rec->offset, rec->length);
6899                 free(rec);
6900         }
6901
6902         return ret;
6903 }
6904
6905 static int process_device_item(struct rb_root *dev_cache,
6906                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6907 {
6908         struct btrfs_dev_item *ptr;
6909         struct device_record *rec;
6910         int ret = 0;
6911
6912         ptr = btrfs_item_ptr(eb,
6913                 slot, struct btrfs_dev_item);
6914
6915         rec = malloc(sizeof(*rec));
6916         if (!rec) {
6917                 fprintf(stderr, "memory allocation failed\n");
6918                 return -ENOMEM;
6919         }
6920
6921         rec->devid = key->offset;
6922         rec->generation = btrfs_header_generation(eb);
6923
6924         rec->objectid = key->objectid;
6925         rec->type = key->type;
6926         rec->offset = key->offset;
6927
6928         rec->devid = btrfs_device_id(eb, ptr);
6929         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6930         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6931
6932         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6933         if (ret) {
6934                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6935                 free(rec);
6936         }
6937
6938         return ret;
6939 }
6940
6941 struct block_group_record *
6942 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6943                              int slot)
6944 {
6945         struct btrfs_block_group_item *ptr;
6946         struct block_group_record *rec;
6947
6948         rec = calloc(1, sizeof(*rec));
6949         if (!rec) {
6950                 fprintf(stderr, "memory allocation failed\n");
6951                 exit(-1);
6952         }
6953
6954         rec->cache.start = key->objectid;
6955         rec->cache.size = key->offset;
6956
6957         rec->generation = btrfs_header_generation(leaf);
6958
6959         rec->objectid = key->objectid;
6960         rec->type = key->type;
6961         rec->offset = key->offset;
6962
6963         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6964         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6965
6966         INIT_LIST_HEAD(&rec->list);
6967
6968         return rec;
6969 }
6970
6971 static int process_block_group_item(struct block_group_tree *block_group_cache,
6972                                     struct btrfs_key *key,
6973                                     struct extent_buffer *eb, int slot)
6974 {
6975         struct block_group_record *rec;
6976         int ret = 0;
6977
6978         rec = btrfs_new_block_group_record(eb, key, slot);
6979         ret = insert_block_group_record(block_group_cache, rec);
6980         if (ret) {
6981                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6982                         rec->objectid, rec->offset);
6983                 free(rec);
6984         }
6985
6986         return ret;
6987 }
6988
6989 struct device_extent_record *
6990 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6991                                struct btrfs_key *key, int slot)
6992 {
6993         struct device_extent_record *rec;
6994         struct btrfs_dev_extent *ptr;
6995
6996         rec = calloc(1, sizeof(*rec));
6997         if (!rec) {
6998                 fprintf(stderr, "memory allocation failed\n");
6999                 exit(-1);
7000         }
7001
7002         rec->cache.objectid = key->objectid;
7003         rec->cache.start = key->offset;
7004
7005         rec->generation = btrfs_header_generation(leaf);
7006
7007         rec->objectid = key->objectid;
7008         rec->type = key->type;
7009         rec->offset = key->offset;
7010
7011         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7012         rec->chunk_objecteid =
7013                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7014         rec->chunk_offset =
7015                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7016         rec->length = btrfs_dev_extent_length(leaf, ptr);
7017         rec->cache.size = rec->length;
7018
7019         INIT_LIST_HEAD(&rec->chunk_list);
7020         INIT_LIST_HEAD(&rec->device_list);
7021
7022         return rec;
7023 }
7024
7025 static int
7026 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7027                            struct btrfs_key *key, struct extent_buffer *eb,
7028                            int slot)
7029 {
7030         struct device_extent_record *rec;
7031         int ret;
7032
7033         rec = btrfs_new_device_extent_record(eb, key, slot);
7034         ret = insert_device_extent_record(dev_extent_cache, rec);
7035         if (ret) {
7036                 fprintf(stderr,
7037                         "Device extent[%llu, %llu, %llu] existed.\n",
7038                         rec->objectid, rec->offset, rec->length);
7039                 free(rec);
7040         }
7041
7042         return ret;
7043 }
7044
7045 static int process_extent_item(struct btrfs_root *root,
7046                                struct cache_tree *extent_cache,
7047                                struct extent_buffer *eb, int slot)
7048 {
7049         struct btrfs_extent_item *ei;
7050         struct btrfs_extent_inline_ref *iref;
7051         struct btrfs_extent_data_ref *dref;
7052         struct btrfs_shared_data_ref *sref;
7053         struct btrfs_key key;
7054         struct extent_record tmpl;
7055         unsigned long end;
7056         unsigned long ptr;
7057         int ret;
7058         int type;
7059         u32 item_size = btrfs_item_size_nr(eb, slot);
7060         u64 refs = 0;
7061         u64 offset;
7062         u64 num_bytes;
7063         int metadata = 0;
7064
7065         btrfs_item_key_to_cpu(eb, &key, slot);
7066
7067         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7068                 metadata = 1;
7069                 num_bytes = root->fs_info->nodesize;
7070         } else {
7071                 num_bytes = key.offset;
7072         }
7073
7074         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7075                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7076                       key.objectid, root->fs_info->sectorsize);
7077                 return -EIO;
7078         }
7079         if (item_size < sizeof(*ei)) {
7080 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7081                 struct btrfs_extent_item_v0 *ei0;
7082                 BUG_ON(item_size != sizeof(*ei0));
7083                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7084                 refs = btrfs_extent_refs_v0(eb, ei0);
7085 #else
7086                 BUG();
7087 #endif
7088                 memset(&tmpl, 0, sizeof(tmpl));
7089                 tmpl.start = key.objectid;
7090                 tmpl.nr = num_bytes;
7091                 tmpl.extent_item_refs = refs;
7092                 tmpl.metadata = metadata;
7093                 tmpl.found_rec = 1;
7094                 tmpl.max_size = num_bytes;
7095
7096                 return add_extent_rec(extent_cache, &tmpl);
7097         }
7098
7099         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7100         refs = btrfs_extent_refs(eb, ei);
7101         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7102                 metadata = 1;
7103         else
7104                 metadata = 0;
7105         if (metadata && num_bytes != root->fs_info->nodesize) {
7106                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7107                       num_bytes, root->fs_info->nodesize);
7108                 return -EIO;
7109         }
7110         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7111                 error("ignore invalid data extent, length %llu is not aligned to %u",
7112                       num_bytes, root->fs_info->sectorsize);
7113                 return -EIO;
7114         }
7115
7116         memset(&tmpl, 0, sizeof(tmpl));
7117         tmpl.start = key.objectid;
7118         tmpl.nr = num_bytes;
7119         tmpl.extent_item_refs = refs;
7120         tmpl.metadata = metadata;
7121         tmpl.found_rec = 1;
7122         tmpl.max_size = num_bytes;
7123         add_extent_rec(extent_cache, &tmpl);
7124
7125         ptr = (unsigned long)(ei + 1);
7126         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7127             key.type == BTRFS_EXTENT_ITEM_KEY)
7128                 ptr += sizeof(struct btrfs_tree_block_info);
7129
7130         end = (unsigned long)ei + item_size;
7131         while (ptr < end) {
7132                 iref = (struct btrfs_extent_inline_ref *)ptr;
7133                 type = btrfs_extent_inline_ref_type(eb, iref);
7134                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7135                 switch (type) {
7136                 case BTRFS_TREE_BLOCK_REF_KEY:
7137                         ret = add_tree_backref(extent_cache, key.objectid,
7138                                         0, offset, 0);
7139                         if (ret < 0)
7140                                 error(
7141                         "add_tree_backref failed (extent items tree block): %s",
7142                                       strerror(-ret));
7143                         break;
7144                 case BTRFS_SHARED_BLOCK_REF_KEY:
7145                         ret = add_tree_backref(extent_cache, key.objectid,
7146                                         offset, 0, 0);
7147                         if (ret < 0)
7148                                 error(
7149                         "add_tree_backref failed (extent items shared block): %s",
7150                                       strerror(-ret));
7151                         break;
7152                 case BTRFS_EXTENT_DATA_REF_KEY:
7153                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7154                         add_data_backref(extent_cache, key.objectid, 0,
7155                                         btrfs_extent_data_ref_root(eb, dref),
7156                                         btrfs_extent_data_ref_objectid(eb,
7157                                                                        dref),
7158                                         btrfs_extent_data_ref_offset(eb, dref),
7159                                         btrfs_extent_data_ref_count(eb, dref),
7160                                         0, num_bytes);
7161                         break;
7162                 case BTRFS_SHARED_DATA_REF_KEY:
7163                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7164                         add_data_backref(extent_cache, key.objectid, offset,
7165                                         0, 0, 0,
7166                                         btrfs_shared_data_ref_count(eb, sref),
7167                                         0, num_bytes);
7168                         break;
7169                 default:
7170                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7171                                 key.objectid, key.type, num_bytes);
7172                         goto out;
7173                 }
7174                 ptr += btrfs_extent_inline_ref_size(type);
7175         }
7176         WARN_ON(ptr > end);
7177 out:
7178         return 0;
7179 }
7180
7181 static int check_cache_range(struct btrfs_root *root,
7182                              struct btrfs_block_group_cache *cache,
7183                              u64 offset, u64 bytes)
7184 {
7185         struct btrfs_free_space *entry;
7186         u64 *logical;
7187         u64 bytenr;
7188         int stripe_len;
7189         int i, nr, ret;
7190
7191         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7192                 bytenr = btrfs_sb_offset(i);
7193                 ret = btrfs_rmap_block(root->fs_info,
7194                                        cache->key.objectid, bytenr, 0,
7195                                        &logical, &nr, &stripe_len);
7196                 if (ret)
7197                         return ret;
7198
7199                 while (nr--) {
7200                         if (logical[nr] + stripe_len <= offset)
7201                                 continue;
7202                         if (offset + bytes <= logical[nr])
7203                                 continue;
7204                         if (logical[nr] == offset) {
7205                                 if (stripe_len >= bytes) {
7206                                         free(logical);
7207                                         return 0;
7208                                 }
7209                                 bytes -= stripe_len;
7210                                 offset += stripe_len;
7211                         } else if (logical[nr] < offset) {
7212                                 if (logical[nr] + stripe_len >=
7213                                     offset + bytes) {
7214                                         free(logical);
7215                                         return 0;
7216                                 }
7217                                 bytes = (offset + bytes) -
7218                                         (logical[nr] + stripe_len);
7219                                 offset = logical[nr] + stripe_len;
7220                         } else {
7221                                 /*
7222                                  * Could be tricky, the super may land in the
7223                                  * middle of the area we're checking.  First
7224                                  * check the easiest case, it's at the end.
7225                                  */
7226                                 if (logical[nr] + stripe_len >=
7227                                     bytes + offset) {
7228                                         bytes = logical[nr] - offset;
7229                                         continue;
7230                                 }
7231
7232                                 /* Check the left side */
7233                                 ret = check_cache_range(root, cache,
7234                                                         offset,
7235                                                         logical[nr] - offset);
7236                                 if (ret) {
7237                                         free(logical);
7238                                         return ret;
7239                                 }
7240
7241                                 /* Now we continue with the right side */
7242                                 bytes = (offset + bytes) -
7243                                         (logical[nr] + stripe_len);
7244                                 offset = logical[nr] + stripe_len;
7245                         }
7246                 }
7247
7248                 free(logical);
7249         }
7250
7251         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7252         if (!entry) {
7253                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7254                         offset, offset+bytes);
7255                 return -EINVAL;
7256         }
7257
7258         if (entry->offset != offset) {
7259                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7260                         entry->offset);
7261                 return -EINVAL;
7262         }
7263
7264         if (entry->bytes != bytes) {
7265                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7266                         bytes, entry->bytes, offset);
7267                 return -EINVAL;
7268         }
7269
7270         unlink_free_space(cache->free_space_ctl, entry);
7271         free(entry);
7272         return 0;
7273 }
7274
7275 static int verify_space_cache(struct btrfs_root *root,
7276                               struct btrfs_block_group_cache *cache)
7277 {
7278         struct btrfs_path path;
7279         struct extent_buffer *leaf;
7280         struct btrfs_key key;
7281         u64 last;
7282         int ret = 0;
7283
7284         root = root->fs_info->extent_root;
7285
7286         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7287
7288         btrfs_init_path(&path);
7289         key.objectid = last;
7290         key.offset = 0;
7291         key.type = BTRFS_EXTENT_ITEM_KEY;
7292         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7293         if (ret < 0)
7294                 goto out;
7295         ret = 0;
7296         while (1) {
7297                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7298                         ret = btrfs_next_leaf(root, &path);
7299                         if (ret < 0)
7300                                 goto out;
7301                         if (ret > 0) {
7302                                 ret = 0;
7303                                 break;
7304                         }
7305                 }
7306                 leaf = path.nodes[0];
7307                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7308                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7309                         break;
7310                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7311                     key.type != BTRFS_METADATA_ITEM_KEY) {
7312                         path.slots[0]++;
7313                         continue;
7314                 }
7315
7316                 if (last == key.objectid) {
7317                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7318                                 last = key.objectid + key.offset;
7319                         else
7320                                 last = key.objectid + root->fs_info->nodesize;
7321                         path.slots[0]++;
7322                         continue;
7323                 }
7324
7325                 ret = check_cache_range(root, cache, last,
7326                                         key.objectid - last);
7327                 if (ret)
7328                         break;
7329                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7330                         last = key.objectid + key.offset;
7331                 else
7332                         last = key.objectid + root->fs_info->nodesize;
7333                 path.slots[0]++;
7334         }
7335
7336         if (last < cache->key.objectid + cache->key.offset)
7337                 ret = check_cache_range(root, cache, last,
7338                                         cache->key.objectid +
7339                                         cache->key.offset - last);
7340
7341 out:
7342         btrfs_release_path(&path);
7343
7344         if (!ret &&
7345             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7346                 fprintf(stderr, "There are still entries left in the space "
7347                         "cache\n");
7348                 ret = -EINVAL;
7349         }
7350
7351         return ret;
7352 }
7353
7354 static int check_space_cache(struct btrfs_root *root)
7355 {
7356         struct btrfs_block_group_cache *cache;
7357         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7358         int ret;
7359         int error = 0;
7360
7361         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7362             btrfs_super_generation(root->fs_info->super_copy) !=
7363             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7364                 printf("cache and super generation don't match, space cache "
7365                        "will be invalidated\n");
7366                 return 0;
7367         }
7368
7369         if (ctx.progress_enabled) {
7370                 ctx.tp = TASK_FREE_SPACE;
7371                 task_start(ctx.info);
7372         }
7373
7374         while (1) {
7375                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7376                 if (!cache)
7377                         break;
7378
7379                 start = cache->key.objectid + cache->key.offset;
7380                 if (!cache->free_space_ctl) {
7381                         if (btrfs_init_free_space_ctl(cache,
7382                                                 root->fs_info->sectorsize)) {
7383                                 ret = -ENOMEM;
7384                                 break;
7385                         }
7386                 } else {
7387                         btrfs_remove_free_space_cache(cache);
7388                 }
7389
7390                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7391                         ret = exclude_super_stripes(root, cache);
7392                         if (ret) {
7393                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7394                                         strerror(-ret));
7395                                 error++;
7396                                 continue;
7397                         }
7398                         ret = load_free_space_tree(root->fs_info, cache);
7399                         free_excluded_extents(root, cache);
7400                         if (ret < 0) {
7401                                 fprintf(stderr, "could not load free space tree: %s\n",
7402                                         strerror(-ret));
7403                                 error++;
7404                                 continue;
7405                         }
7406                         error += ret;
7407                 } else {
7408                         ret = load_free_space_cache(root->fs_info, cache);
7409                         if (!ret)
7410                                 continue;
7411                 }
7412
7413                 ret = verify_space_cache(root, cache);
7414                 if (ret) {
7415                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7416                                 cache->key.objectid);
7417                         error++;
7418                 }
7419         }
7420
7421         task_stop(ctx.info);
7422
7423         return error ? -EINVAL : 0;
7424 }
7425
7426 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7427                         u64 num_bytes, unsigned long leaf_offset,
7428                         struct extent_buffer *eb) {
7429
7430         struct btrfs_fs_info *fs_info = root->fs_info;
7431         u64 offset = 0;
7432         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7433         char *data;
7434         unsigned long csum_offset;
7435         u32 csum;
7436         u32 csum_expected;
7437         u64 read_len;
7438         u64 data_checked = 0;
7439         u64 tmp;
7440         int ret = 0;
7441         int mirror;
7442         int num_copies;
7443
7444         if (num_bytes % fs_info->sectorsize)
7445                 return -EINVAL;
7446
7447         data = malloc(num_bytes);
7448         if (!data)
7449                 return -ENOMEM;
7450
7451         while (offset < num_bytes) {
7452                 mirror = 0;
7453 again:
7454                 read_len = num_bytes - offset;
7455                 /* read as much space once a time */
7456                 ret = read_extent_data(fs_info, data + offset,
7457                                 bytenr + offset, &read_len, mirror);
7458                 if (ret)
7459                         goto out;
7460                 data_checked = 0;
7461                 /* verify every 4k data's checksum */
7462                 while (data_checked < read_len) {
7463                         csum = ~(u32)0;
7464                         tmp = offset + data_checked;
7465
7466                         csum = btrfs_csum_data((char *)data + tmp,
7467                                                csum, fs_info->sectorsize);
7468                         btrfs_csum_final(csum, (u8 *)&csum);
7469
7470                         csum_offset = leaf_offset +
7471                                  tmp / fs_info->sectorsize * csum_size;
7472                         read_extent_buffer(eb, (char *)&csum_expected,
7473                                            csum_offset, csum_size);
7474                         /* try another mirror */
7475                         if (csum != csum_expected) {
7476                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7477                                                 mirror, bytenr + tmp,
7478                                                 csum, csum_expected);
7479                                 num_copies = btrfs_num_copies(root->fs_info,
7480                                                 bytenr, num_bytes);
7481                                 if (mirror < num_copies - 1) {
7482                                         mirror += 1;
7483                                         goto again;
7484                                 }
7485                         }
7486                         data_checked += fs_info->sectorsize;
7487                 }
7488                 offset += read_len;
7489         }
7490 out:
7491         free(data);
7492         return ret;
7493 }
7494
7495 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7496                                u64 num_bytes)
7497 {
7498         struct btrfs_path path;
7499         struct extent_buffer *leaf;
7500         struct btrfs_key key;
7501         int ret;
7502
7503         btrfs_init_path(&path);
7504         key.objectid = bytenr;
7505         key.type = BTRFS_EXTENT_ITEM_KEY;
7506         key.offset = (u64)-1;
7507
7508 again:
7509         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7510                                 0, 0);
7511         if (ret < 0) {
7512                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7513                 btrfs_release_path(&path);
7514                 return ret;
7515         } else if (ret) {
7516                 if (path.slots[0] > 0) {
7517                         path.slots[0]--;
7518                 } else {
7519                         ret = btrfs_prev_leaf(root, &path);
7520                         if (ret < 0) {
7521                                 goto out;
7522                         } else if (ret > 0) {
7523                                 ret = 0;
7524                                 goto out;
7525                         }
7526                 }
7527         }
7528
7529         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7530
7531         /*
7532          * Block group items come before extent items if they have the same
7533          * bytenr, so walk back one more just in case.  Dear future traveller,
7534          * first congrats on mastering time travel.  Now if it's not too much
7535          * trouble could you go back to 2006 and tell Chris to make the
7536          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7537          * EXTENT_ITEM_KEY please?
7538          */
7539         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7540                 if (path.slots[0] > 0) {
7541                         path.slots[0]--;
7542                 } else {
7543                         ret = btrfs_prev_leaf(root, &path);
7544                         if (ret < 0) {
7545                                 goto out;
7546                         } else if (ret > 0) {
7547                                 ret = 0;
7548                                 goto out;
7549                         }
7550                 }
7551                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7552         }
7553
7554         while (num_bytes) {
7555                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7556                         ret = btrfs_next_leaf(root, &path);
7557                         if (ret < 0) {
7558                                 fprintf(stderr, "Error going to next leaf "
7559                                         "%d\n", ret);
7560                                 btrfs_release_path(&path);
7561                                 return ret;
7562                         } else if (ret) {
7563                                 break;
7564                         }
7565                 }
7566                 leaf = path.nodes[0];
7567                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7568                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7569                         path.slots[0]++;
7570                         continue;
7571                 }
7572                 if (key.objectid + key.offset < bytenr) {
7573                         path.slots[0]++;
7574                         continue;
7575                 }
7576                 if (key.objectid > bytenr + num_bytes)
7577                         break;
7578
7579                 if (key.objectid == bytenr) {
7580                         if (key.offset >= num_bytes) {
7581                                 num_bytes = 0;
7582                                 break;
7583                         }
7584                         num_bytes -= key.offset;
7585                         bytenr += key.offset;
7586                 } else if (key.objectid < bytenr) {
7587                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7588                                 num_bytes = 0;
7589                                 break;
7590                         }
7591                         num_bytes = (bytenr + num_bytes) -
7592                                 (key.objectid + key.offset);
7593                         bytenr = key.objectid + key.offset;
7594                 } else {
7595                         if (key.objectid + key.offset < bytenr + num_bytes) {
7596                                 u64 new_start = key.objectid + key.offset;
7597                                 u64 new_bytes = bytenr + num_bytes - new_start;
7598
7599                                 /*
7600                                  * Weird case, the extent is in the middle of
7601                                  * our range, we'll have to search one side
7602                                  * and then the other.  Not sure if this happens
7603                                  * in real life, but no harm in coding it up
7604                                  * anyway just in case.
7605                                  */
7606                                 btrfs_release_path(&path);
7607                                 ret = check_extent_exists(root, new_start,
7608                                                           new_bytes);
7609                                 if (ret) {
7610                                         fprintf(stderr, "Right section didn't "
7611                                                 "have a record\n");
7612                                         break;
7613                                 }
7614                                 num_bytes = key.objectid - bytenr;
7615                                 goto again;
7616                         }
7617                         num_bytes = key.objectid - bytenr;
7618                 }
7619                 path.slots[0]++;
7620         }
7621         ret = 0;
7622
7623 out:
7624         if (num_bytes && !ret) {
7625                 fprintf(stderr, "There are no extents for csum range "
7626                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7627                 ret = 1;
7628         }
7629
7630         btrfs_release_path(&path);
7631         return ret;
7632 }
7633
7634 static int check_csums(struct btrfs_root *root)
7635 {
7636         struct btrfs_path path;
7637         struct extent_buffer *leaf;
7638         struct btrfs_key key;
7639         u64 offset = 0, num_bytes = 0;
7640         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7641         int errors = 0;
7642         int ret;
7643         u64 data_len;
7644         unsigned long leaf_offset;
7645
7646         root = root->fs_info->csum_root;
7647         if (!extent_buffer_uptodate(root->node)) {
7648                 fprintf(stderr, "No valid csum tree found\n");
7649                 return -ENOENT;
7650         }
7651
7652         btrfs_init_path(&path);
7653         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7654         key.type = BTRFS_EXTENT_CSUM_KEY;
7655         key.offset = 0;
7656         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7657         if (ret < 0) {
7658                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7659                 btrfs_release_path(&path);
7660                 return ret;
7661         }
7662
7663         if (ret > 0 && path.slots[0])
7664                 path.slots[0]--;
7665         ret = 0;
7666
7667         while (1) {
7668                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7669                         ret = btrfs_next_leaf(root, &path);
7670                         if (ret < 0) {
7671                                 fprintf(stderr, "Error going to next leaf "
7672                                         "%d\n", ret);
7673                                 break;
7674                         }
7675                         if (ret)
7676                                 break;
7677                 }
7678                 leaf = path.nodes[0];
7679
7680                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7681                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7682                         path.slots[0]++;
7683                         continue;
7684                 }
7685
7686                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7687                               csum_size) * root->fs_info->sectorsize;
7688                 if (!check_data_csum)
7689                         goto skip_csum_check;
7690                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7691                 ret = check_extent_csums(root, key.offset, data_len,
7692                                          leaf_offset, leaf);
7693                 if (ret)
7694                         break;
7695 skip_csum_check:
7696                 if (!num_bytes) {
7697                         offset = key.offset;
7698                 } else if (key.offset != offset + num_bytes) {
7699                         ret = check_extent_exists(root, offset, num_bytes);
7700                         if (ret) {
7701                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7702                                         "there is no extent record\n",
7703                                         offset, offset+num_bytes);
7704                                 errors++;
7705                         }
7706                         offset = key.offset;
7707                         num_bytes = 0;
7708                 }
7709                 num_bytes += data_len;
7710                 path.slots[0]++;
7711         }
7712
7713         btrfs_release_path(&path);
7714         return errors;
7715 }
7716
7717 static int is_dropped_key(struct btrfs_key *key,
7718                           struct btrfs_key *drop_key) {
7719         if (key->objectid < drop_key->objectid)
7720                 return 1;
7721         else if (key->objectid == drop_key->objectid) {
7722                 if (key->type < drop_key->type)
7723                         return 1;
7724                 else if (key->type == drop_key->type) {
7725                         if (key->offset < drop_key->offset)
7726                                 return 1;
7727                 }
7728         }
7729         return 0;
7730 }
7731
7732 /*
7733  * Here are the rules for FULL_BACKREF.
7734  *
7735  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7736  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7737  *      FULL_BACKREF set.
7738  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7739  *    if it happened after the relocation occurred since we'll have dropped the
7740  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7741  *    have no real way to know for sure.
7742  *
7743  * We process the blocks one root at a time, and we start from the lowest root
7744  * objectid and go to the highest.  So we can just lookup the owner backref for
7745  * the record and if we don't find it then we know it doesn't exist and we have
7746  * a FULL BACKREF.
7747  *
7748  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7749  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7750  * be set or not and then we can check later once we've gathered all the refs.
7751  */
7752 static int calc_extent_flag(struct cache_tree *extent_cache,
7753                            struct extent_buffer *buf,
7754                            struct root_item_record *ri,
7755                            u64 *flags)
7756 {
7757         struct extent_record *rec;
7758         struct cache_extent *cache;
7759         struct tree_backref *tback;
7760         u64 owner = 0;
7761
7762         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7763         /* we have added this extent before */
7764         if (!cache)
7765                 return -ENOENT;
7766
7767         rec = container_of(cache, struct extent_record, cache);
7768
7769         /*
7770          * Except file/reloc tree, we can not have
7771          * FULL BACKREF MODE
7772          */
7773         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7774                 goto normal;
7775         /*
7776          * root node
7777          */
7778         if (buf->start == ri->bytenr)
7779                 goto normal;
7780
7781         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7782                 goto full_backref;
7783
7784         owner = btrfs_header_owner(buf);
7785         if (owner == ri->objectid)
7786                 goto normal;
7787
7788         tback = find_tree_backref(rec, 0, owner);
7789         if (!tback)
7790                 goto full_backref;
7791 normal:
7792         *flags = 0;
7793         if (rec->flag_block_full_backref != FLAG_UNSET &&
7794             rec->flag_block_full_backref != 0)
7795                 rec->bad_full_backref = 1;
7796         return 0;
7797 full_backref:
7798         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7799         if (rec->flag_block_full_backref != FLAG_UNSET &&
7800             rec->flag_block_full_backref != 1)
7801                 rec->bad_full_backref = 1;
7802         return 0;
7803 }
7804
7805 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7806 {
7807         fprintf(stderr, "Invalid key type(");
7808         print_key_type(stderr, 0, key_type);
7809         fprintf(stderr, ") found in root(");
7810         print_objectid(stderr, rootid, 0);
7811         fprintf(stderr, ")\n");
7812 }
7813
7814 /*
7815  * Check if the key is valid with its extent buffer.
7816  *
7817  * This is a early check in case invalid key exists in a extent buffer
7818  * This is not comprehensive yet, but should prevent wrong key/item passed
7819  * further
7820  */
7821 static int check_type_with_root(u64 rootid, u8 key_type)
7822 {
7823         switch (key_type) {
7824         /* Only valid in chunk tree */
7825         case BTRFS_DEV_ITEM_KEY:
7826         case BTRFS_CHUNK_ITEM_KEY:
7827                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7828                         goto err;
7829                 break;
7830         /* valid in csum and log tree */
7831         case BTRFS_CSUM_TREE_OBJECTID:
7832                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7833                       is_fstree(rootid)))
7834                         goto err;
7835                 break;
7836         case BTRFS_EXTENT_ITEM_KEY:
7837         case BTRFS_METADATA_ITEM_KEY:
7838         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7839                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7840                         goto err;
7841                 break;
7842         case BTRFS_ROOT_ITEM_KEY:
7843                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7844                         goto err;
7845                 break;
7846         case BTRFS_DEV_EXTENT_KEY:
7847                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7848                         goto err;
7849                 break;
7850         }
7851         return 0;
7852 err:
7853         report_mismatch_key_root(key_type, rootid);
7854         return -EINVAL;
7855 }
7856
7857 static int run_next_block(struct btrfs_root *root,
7858                           struct block_info *bits,
7859                           int bits_nr,
7860                           u64 *last,
7861                           struct cache_tree *pending,
7862                           struct cache_tree *seen,
7863                           struct cache_tree *reada,
7864                           struct cache_tree *nodes,
7865                           struct cache_tree *extent_cache,
7866                           struct cache_tree *chunk_cache,
7867                           struct rb_root *dev_cache,
7868                           struct block_group_tree *block_group_cache,
7869                           struct device_extent_tree *dev_extent_cache,
7870                           struct root_item_record *ri)
7871 {
7872         struct btrfs_fs_info *fs_info = root->fs_info;
7873         struct extent_buffer *buf;
7874         struct extent_record *rec = NULL;
7875         u64 bytenr;
7876         u32 size;
7877         u64 parent;
7878         u64 owner;
7879         u64 flags;
7880         u64 ptr;
7881         u64 gen = 0;
7882         int ret = 0;
7883         int i;
7884         int nritems;
7885         struct btrfs_key key;
7886         struct cache_extent *cache;
7887         int reada_bits;
7888
7889         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7890                                     bits_nr, &reada_bits);
7891         if (nritems == 0)
7892                 return 1;
7893
7894         if (!reada_bits) {
7895                 for(i = 0; i < nritems; i++) {
7896                         ret = add_cache_extent(reada, bits[i].start,
7897                                                bits[i].size);
7898                         if (ret == -EEXIST)
7899                                 continue;
7900
7901                         /* fixme, get the parent transid */
7902                         readahead_tree_block(fs_info, bits[i].start, 0);
7903                 }
7904         }
7905         *last = bits[0].start;
7906         bytenr = bits[0].start;
7907         size = bits[0].size;
7908
7909         cache = lookup_cache_extent(pending, bytenr, size);
7910         if (cache) {
7911                 remove_cache_extent(pending, cache);
7912                 free(cache);
7913         }
7914         cache = lookup_cache_extent(reada, bytenr, size);
7915         if (cache) {
7916                 remove_cache_extent(reada, cache);
7917                 free(cache);
7918         }
7919         cache = lookup_cache_extent(nodes, bytenr, size);
7920         if (cache) {
7921                 remove_cache_extent(nodes, cache);
7922                 free(cache);
7923         }
7924         cache = lookup_cache_extent(extent_cache, bytenr, size);
7925         if (cache) {
7926                 rec = container_of(cache, struct extent_record, cache);
7927                 gen = rec->parent_generation;
7928         }
7929
7930         /* fixme, get the real parent transid */
7931         buf = read_tree_block(root->fs_info, bytenr, gen);
7932         if (!extent_buffer_uptodate(buf)) {
7933                 record_bad_block_io(root->fs_info,
7934                                     extent_cache, bytenr, size);
7935                 goto out;
7936         }
7937
7938         nritems = btrfs_header_nritems(buf);
7939
7940         flags = 0;
7941         if (!init_extent_tree) {
7942                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7943                                        btrfs_header_level(buf), 1, NULL,
7944                                        &flags);
7945                 if (ret < 0) {
7946                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7947                         if (ret < 0) {
7948                                 fprintf(stderr, "Couldn't calc extent flags\n");
7949                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7950                         }
7951                 }
7952         } else {
7953                 flags = 0;
7954                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7955                 if (ret < 0) {
7956                         fprintf(stderr, "Couldn't calc extent flags\n");
7957                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7958                 }
7959         }
7960
7961         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7962                 if (ri != NULL &&
7963                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7964                     ri->objectid == btrfs_header_owner(buf)) {
7965                         /*
7966                          * Ok we got to this block from it's original owner and
7967                          * we have FULL_BACKREF set.  Relocation can leave
7968                          * converted blocks over so this is altogether possible,
7969                          * however it's not possible if the generation > the
7970                          * last snapshot, so check for this case.
7971                          */
7972                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7973                             btrfs_header_generation(buf) > ri->last_snapshot) {
7974                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7975                                 rec->bad_full_backref = 1;
7976                         }
7977                 }
7978         } else {
7979                 if (ri != NULL &&
7980                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7981                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7982                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7983                         rec->bad_full_backref = 1;
7984                 }
7985         }
7986
7987         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7988                 rec->flag_block_full_backref = 1;
7989                 parent = bytenr;
7990                 owner = 0;
7991         } else {
7992                 rec->flag_block_full_backref = 0;
7993                 parent = 0;
7994                 owner = btrfs_header_owner(buf);
7995         }
7996
7997         ret = check_block(root, extent_cache, buf, flags);
7998         if (ret)
7999                 goto out;
8000
8001         if (btrfs_is_leaf(buf)) {
8002                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8003                 for (i = 0; i < nritems; i++) {
8004                         struct btrfs_file_extent_item *fi;
8005                         btrfs_item_key_to_cpu(buf, &key, i);
8006                         /*
8007                          * Check key type against the leaf owner.
8008                          * Could filter quite a lot of early error if
8009                          * owner is correct
8010                          */
8011                         if (check_type_with_root(btrfs_header_owner(buf),
8012                                                  key.type)) {
8013                                 fprintf(stderr, "ignoring invalid key\n");
8014                                 continue;
8015                         }
8016                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8017                                 process_extent_item(root, extent_cache, buf,
8018                                                     i);
8019                                 continue;
8020                         }
8021                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8022                                 process_extent_item(root, extent_cache, buf,
8023                                                     i);
8024                                 continue;
8025                         }
8026                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8027                                 total_csum_bytes +=
8028                                         btrfs_item_size_nr(buf, i);
8029                                 continue;
8030                         }
8031                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8032                                 process_chunk_item(chunk_cache, &key, buf, i);
8033                                 continue;
8034                         }
8035                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8036                                 process_device_item(dev_cache, &key, buf, i);
8037                                 continue;
8038                         }
8039                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8040                                 process_block_group_item(block_group_cache,
8041                                         &key, buf, i);
8042                                 continue;
8043                         }
8044                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8045                                 process_device_extent_item(dev_extent_cache,
8046                                         &key, buf, i);
8047                                 continue;
8048
8049                         }
8050                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8051 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8052                                 process_extent_ref_v0(extent_cache, buf, i);
8053 #else
8054                                 BUG();
8055 #endif
8056                                 continue;
8057                         }
8058
8059                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8060                                 ret = add_tree_backref(extent_cache,
8061                                                 key.objectid, 0, key.offset, 0);
8062                                 if (ret < 0)
8063                                         error(
8064                                 "add_tree_backref failed (leaf tree block): %s",
8065                                               strerror(-ret));
8066                                 continue;
8067                         }
8068                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8069                                 ret = add_tree_backref(extent_cache,
8070                                                 key.objectid, key.offset, 0, 0);
8071                                 if (ret < 0)
8072                                         error(
8073                                 "add_tree_backref failed (leaf shared block): %s",
8074                                               strerror(-ret));
8075                                 continue;
8076                         }
8077                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8078                                 struct btrfs_extent_data_ref *ref;
8079                                 ref = btrfs_item_ptr(buf, i,
8080                                                 struct btrfs_extent_data_ref);
8081                                 add_data_backref(extent_cache,
8082                                         key.objectid, 0,
8083                                         btrfs_extent_data_ref_root(buf, ref),
8084                                         btrfs_extent_data_ref_objectid(buf,
8085                                                                        ref),
8086                                         btrfs_extent_data_ref_offset(buf, ref),
8087                                         btrfs_extent_data_ref_count(buf, ref),
8088                                         0, root->fs_info->sectorsize);
8089                                 continue;
8090                         }
8091                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8092                                 struct btrfs_shared_data_ref *ref;
8093                                 ref = btrfs_item_ptr(buf, i,
8094                                                 struct btrfs_shared_data_ref);
8095                                 add_data_backref(extent_cache,
8096                                         key.objectid, key.offset, 0, 0, 0,
8097                                         btrfs_shared_data_ref_count(buf, ref),
8098                                         0, root->fs_info->sectorsize);
8099                                 continue;
8100                         }
8101                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8102                                 struct bad_item *bad;
8103
8104                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8105                                         continue;
8106                                 if (!owner)
8107                                         continue;
8108                                 bad = malloc(sizeof(struct bad_item));
8109                                 if (!bad)
8110                                         continue;
8111                                 INIT_LIST_HEAD(&bad->list);
8112                                 memcpy(&bad->key, &key,
8113                                        sizeof(struct btrfs_key));
8114                                 bad->root_id = owner;
8115                                 list_add_tail(&bad->list, &delete_items);
8116                                 continue;
8117                         }
8118                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8119                                 continue;
8120                         fi = btrfs_item_ptr(buf, i,
8121                                             struct btrfs_file_extent_item);
8122                         if (btrfs_file_extent_type(buf, fi) ==
8123                             BTRFS_FILE_EXTENT_INLINE)
8124                                 continue;
8125                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8126                                 continue;
8127
8128                         data_bytes_allocated +=
8129                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8130                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8131                                 abort();
8132                         }
8133                         data_bytes_referenced +=
8134                                 btrfs_file_extent_num_bytes(buf, fi);
8135                         add_data_backref(extent_cache,
8136                                 btrfs_file_extent_disk_bytenr(buf, fi),
8137                                 parent, owner, key.objectid, key.offset -
8138                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8139                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8140                 }
8141         } else {
8142                 int level;
8143                 struct btrfs_key first_key;
8144
8145                 first_key.objectid = 0;
8146
8147                 if (nritems > 0)
8148                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8149                 level = btrfs_header_level(buf);
8150                 for (i = 0; i < nritems; i++) {
8151                         struct extent_record tmpl;
8152
8153                         ptr = btrfs_node_blockptr(buf, i);
8154                         size = root->fs_info->nodesize;
8155                         btrfs_node_key_to_cpu(buf, &key, i);
8156                         if (ri != NULL) {
8157                                 if ((level == ri->drop_level)
8158                                     && is_dropped_key(&key, &ri->drop_key)) {
8159                                         continue;
8160                                 }
8161                         }
8162
8163                         memset(&tmpl, 0, sizeof(tmpl));
8164                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8165                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8166                         tmpl.start = ptr;
8167                         tmpl.nr = size;
8168                         tmpl.refs = 1;
8169                         tmpl.metadata = 1;
8170                         tmpl.max_size = size;
8171                         ret = add_extent_rec(extent_cache, &tmpl);
8172                         if (ret < 0)
8173                                 goto out;
8174
8175                         ret = add_tree_backref(extent_cache, ptr, parent,
8176                                         owner, 1);
8177                         if (ret < 0) {
8178                                 error(
8179                                 "add_tree_backref failed (non-leaf block): %s",
8180                                       strerror(-ret));
8181                                 continue;
8182                         }
8183
8184                         if (level > 1) {
8185                                 add_pending(nodes, seen, ptr, size);
8186                         } else {
8187                                 add_pending(pending, seen, ptr, size);
8188                         }
8189                 }
8190                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8191                                       nritems) * sizeof(struct btrfs_key_ptr);
8192         }
8193         total_btree_bytes += buf->len;
8194         if (fs_root_objectid(btrfs_header_owner(buf)))
8195                 total_fs_tree_bytes += buf->len;
8196         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8197                 total_extent_tree_bytes += buf->len;
8198 out:
8199         free_extent_buffer(buf);
8200         return ret;
8201 }
8202
8203 static int add_root_to_pending(struct extent_buffer *buf,
8204                                struct cache_tree *extent_cache,
8205                                struct cache_tree *pending,
8206                                struct cache_tree *seen,
8207                                struct cache_tree *nodes,
8208                                u64 objectid)
8209 {
8210         struct extent_record tmpl;
8211         int ret;
8212
8213         if (btrfs_header_level(buf) > 0)
8214                 add_pending(nodes, seen, buf->start, buf->len);
8215         else
8216                 add_pending(pending, seen, buf->start, buf->len);
8217
8218         memset(&tmpl, 0, sizeof(tmpl));
8219         tmpl.start = buf->start;
8220         tmpl.nr = buf->len;
8221         tmpl.is_root = 1;
8222         tmpl.refs = 1;
8223         tmpl.metadata = 1;
8224         tmpl.max_size = buf->len;
8225         add_extent_rec(extent_cache, &tmpl);
8226
8227         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8228             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8229                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8230                                 0, 1);
8231         else
8232                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8233                                 1);
8234         return ret;
8235 }
8236
8237 /* as we fix the tree, we might be deleting blocks that
8238  * we're tracking for repair.  This hook makes sure we
8239  * remove any backrefs for blocks as we are fixing them.
8240  */
8241 static int free_extent_hook(struct btrfs_trans_handle *trans,
8242                             struct btrfs_root *root,
8243                             u64 bytenr, u64 num_bytes, u64 parent,
8244                             u64 root_objectid, u64 owner, u64 offset,
8245                             int refs_to_drop)
8246 {
8247         struct extent_record *rec;
8248         struct cache_extent *cache;
8249         int is_data;
8250         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8251
8252         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8253         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8254         if (!cache)
8255                 return 0;
8256
8257         rec = container_of(cache, struct extent_record, cache);
8258         if (is_data) {
8259                 struct data_backref *back;
8260                 back = find_data_backref(rec, parent, root_objectid, owner,
8261                                          offset, 1, bytenr, num_bytes);
8262                 if (!back)
8263                         goto out;
8264                 if (back->node.found_ref) {
8265                         back->found_ref -= refs_to_drop;
8266                         if (rec->refs)
8267                                 rec->refs -= refs_to_drop;
8268                 }
8269                 if (back->node.found_extent_tree) {
8270                         back->num_refs -= refs_to_drop;
8271                         if (rec->extent_item_refs)
8272                                 rec->extent_item_refs -= refs_to_drop;
8273                 }
8274                 if (back->found_ref == 0)
8275                         back->node.found_ref = 0;
8276                 if (back->num_refs == 0)
8277                         back->node.found_extent_tree = 0;
8278
8279                 if (!back->node.found_extent_tree && back->node.found_ref) {
8280                         rb_erase(&back->node.node, &rec->backref_tree);
8281                         free(back);
8282                 }
8283         } else {
8284                 struct tree_backref *back;
8285                 back = find_tree_backref(rec, parent, root_objectid);
8286                 if (!back)
8287                         goto out;
8288                 if (back->node.found_ref) {
8289                         if (rec->refs)
8290                                 rec->refs--;
8291                         back->node.found_ref = 0;
8292                 }
8293                 if (back->node.found_extent_tree) {
8294                         if (rec->extent_item_refs)
8295                                 rec->extent_item_refs--;
8296                         back->node.found_extent_tree = 0;
8297                 }
8298                 if (!back->node.found_extent_tree && back->node.found_ref) {
8299                         rb_erase(&back->node.node, &rec->backref_tree);
8300                         free(back);
8301                 }
8302         }
8303         maybe_free_extent_rec(extent_cache, rec);
8304 out:
8305         return 0;
8306 }
8307
8308 static int delete_extent_records(struct btrfs_trans_handle *trans,
8309                                  struct btrfs_root *root,
8310                                  struct btrfs_path *path,
8311                                  u64 bytenr)
8312 {
8313         struct btrfs_key key;
8314         struct btrfs_key found_key;
8315         struct extent_buffer *leaf;
8316         int ret;
8317         int slot;
8318
8319
8320         key.objectid = bytenr;
8321         key.type = (u8)-1;
8322         key.offset = (u64)-1;
8323
8324         while(1) {
8325                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8326                                         &key, path, 0, 1);
8327                 if (ret < 0)
8328                         break;
8329
8330                 if (ret > 0) {
8331                         ret = 0;
8332                         if (path->slots[0] == 0)
8333                                 break;
8334                         path->slots[0]--;
8335                 }
8336                 ret = 0;
8337
8338                 leaf = path->nodes[0];
8339                 slot = path->slots[0];
8340
8341                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8342                 if (found_key.objectid != bytenr)
8343                         break;
8344
8345                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8346                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8347                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8348                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8349                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8350                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8351                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8352                         btrfs_release_path(path);
8353                         if (found_key.type == 0) {
8354                                 if (found_key.offset == 0)
8355                                         break;
8356                                 key.offset = found_key.offset - 1;
8357                                 key.type = found_key.type;
8358                         }
8359                         key.type = found_key.type - 1;
8360                         key.offset = (u64)-1;
8361                         continue;
8362                 }
8363
8364                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8365                         found_key.objectid, found_key.type, found_key.offset);
8366
8367                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8368                 if (ret)
8369                         break;
8370                 btrfs_release_path(path);
8371
8372                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8373                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8374                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8375                                 found_key.offset : root->fs_info->nodesize;
8376
8377                         ret = btrfs_update_block_group(trans, root, bytenr,
8378                                                        bytes, 0, 0);
8379                         if (ret)
8380                                 break;
8381                 }
8382         }
8383
8384         btrfs_release_path(path);
8385         return ret;
8386 }
8387
8388 /*
8389  * for a single backref, this will allocate a new extent
8390  * and add the backref to it.
8391  */
8392 static int record_extent(struct btrfs_trans_handle *trans,
8393                          struct btrfs_fs_info *info,
8394                          struct btrfs_path *path,
8395                          struct extent_record *rec,
8396                          struct extent_backref *back,
8397                          int allocated, u64 flags)
8398 {
8399         int ret = 0;
8400         struct btrfs_root *extent_root = info->extent_root;
8401         struct extent_buffer *leaf;
8402         struct btrfs_key ins_key;
8403         struct btrfs_extent_item *ei;
8404         struct data_backref *dback;
8405         struct btrfs_tree_block_info *bi;
8406
8407         if (!back->is_data)
8408                 rec->max_size = max_t(u64, rec->max_size,
8409                                     info->nodesize);
8410
8411         if (!allocated) {
8412                 u32 item_size = sizeof(*ei);
8413
8414                 if (!back->is_data)
8415                         item_size += sizeof(*bi);
8416
8417                 ins_key.objectid = rec->start;
8418                 ins_key.offset = rec->max_size;
8419                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8420
8421                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8422                                         &ins_key, item_size);
8423                 if (ret)
8424                         goto fail;
8425
8426                 leaf = path->nodes[0];
8427                 ei = btrfs_item_ptr(leaf, path->slots[0],
8428                                     struct btrfs_extent_item);
8429
8430                 btrfs_set_extent_refs(leaf, ei, 0);
8431                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8432
8433                 if (back->is_data) {
8434                         btrfs_set_extent_flags(leaf, ei,
8435                                                BTRFS_EXTENT_FLAG_DATA);
8436                 } else {
8437                         struct btrfs_disk_key copy_key;;
8438
8439                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8440                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8441                                              sizeof(*bi));
8442
8443                         btrfs_set_disk_key_objectid(&copy_key,
8444                                                     rec->info_objectid);
8445                         btrfs_set_disk_key_type(&copy_key, 0);
8446                         btrfs_set_disk_key_offset(&copy_key, 0);
8447
8448                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8449                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8450
8451                         btrfs_set_extent_flags(leaf, ei,
8452                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8453                 }
8454
8455                 btrfs_mark_buffer_dirty(leaf);
8456                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8457                                                rec->max_size, 1, 0);
8458                 if (ret)
8459                         goto fail;
8460                 btrfs_release_path(path);
8461         }
8462
8463         if (back->is_data) {
8464                 u64 parent;
8465                 int i;
8466
8467                 dback = to_data_backref(back);
8468                 if (back->full_backref)
8469                         parent = dback->parent;
8470                 else
8471                         parent = 0;
8472
8473                 for (i = 0; i < dback->found_ref; i++) {
8474                         /* if parent != 0, we're doing a full backref
8475                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8476                          * just makes the backref allocator create a data
8477                          * backref
8478                          */
8479                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8480                                                    rec->start, rec->max_size,
8481                                                    parent,
8482                                                    dback->root,
8483                                                    parent ?
8484                                                    BTRFS_FIRST_FREE_OBJECTID :
8485                                                    dback->owner,
8486                                                    dback->offset);
8487                         if (ret)
8488                                 break;
8489                 }
8490                 fprintf(stderr, "adding new data backref"
8491                                 " on %llu %s %llu owner %llu"
8492                                 " offset %llu found %d\n",
8493                                 (unsigned long long)rec->start,
8494                                 back->full_backref ?
8495                                 "parent" : "root",
8496                                 back->full_backref ?
8497                                 (unsigned long long)parent :
8498                                 (unsigned long long)dback->root,
8499                                 (unsigned long long)dback->owner,
8500                                 (unsigned long long)dback->offset,
8501                                 dback->found_ref);
8502         } else {
8503                 u64 parent;
8504                 struct tree_backref *tback;
8505
8506                 tback = to_tree_backref(back);
8507                 if (back->full_backref)
8508                         parent = tback->parent;
8509                 else
8510                         parent = 0;
8511
8512                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8513                                            rec->start, rec->max_size,
8514                                            parent, tback->root, 0, 0);
8515                 fprintf(stderr, "adding new tree backref on "
8516                         "start %llu len %llu parent %llu root %llu\n",
8517                         rec->start, rec->max_size, parent, tback->root);
8518         }
8519 fail:
8520         btrfs_release_path(path);
8521         return ret;
8522 }
8523
8524 static struct extent_entry *find_entry(struct list_head *entries,
8525                                        u64 bytenr, u64 bytes)
8526 {
8527         struct extent_entry *entry = NULL;
8528
8529         list_for_each_entry(entry, entries, list) {
8530                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8531                         return entry;
8532         }
8533
8534         return NULL;
8535 }
8536
8537 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8538 {
8539         struct extent_entry *entry, *best = NULL, *prev = NULL;
8540
8541         list_for_each_entry(entry, entries, list) {
8542                 /*
8543                  * If there are as many broken entries as entries then we know
8544                  * not to trust this particular entry.
8545                  */
8546                 if (entry->broken == entry->count)
8547                         continue;
8548
8549                 /*
8550                  * Special case, when there are only two entries and 'best' is
8551                  * the first one
8552                  */
8553                 if (!prev) {
8554                         best = entry;
8555                         prev = entry;
8556                         continue;
8557                 }
8558
8559                 /*
8560                  * If our current entry == best then we can't be sure our best
8561                  * is really the best, so we need to keep searching.
8562                  */
8563                 if (best && best->count == entry->count) {
8564                         prev = entry;
8565                         best = NULL;
8566                         continue;
8567                 }
8568
8569                 /* Prev == entry, not good enough, have to keep searching */
8570                 if (!prev->broken && prev->count == entry->count)
8571                         continue;
8572
8573                 if (!best)
8574                         best = (prev->count > entry->count) ? prev : entry;
8575                 else if (best->count < entry->count)
8576                         best = entry;
8577                 prev = entry;
8578         }
8579
8580         return best;
8581 }
8582
8583 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8584                       struct data_backref *dback, struct extent_entry *entry)
8585 {
8586         struct btrfs_trans_handle *trans;
8587         struct btrfs_root *root;
8588         struct btrfs_file_extent_item *fi;
8589         struct extent_buffer *leaf;
8590         struct btrfs_key key;
8591         u64 bytenr, bytes;
8592         int ret, err;
8593
8594         key.objectid = dback->root;
8595         key.type = BTRFS_ROOT_ITEM_KEY;
8596         key.offset = (u64)-1;
8597         root = btrfs_read_fs_root(info, &key);
8598         if (IS_ERR(root)) {
8599                 fprintf(stderr, "Couldn't find root for our ref\n");
8600                 return -EINVAL;
8601         }
8602
8603         /*
8604          * The backref points to the original offset of the extent if it was
8605          * split, so we need to search down to the offset we have and then walk
8606          * forward until we find the backref we're looking for.
8607          */
8608         key.objectid = dback->owner;
8609         key.type = BTRFS_EXTENT_DATA_KEY;
8610         key.offset = dback->offset;
8611         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8612         if (ret < 0) {
8613                 fprintf(stderr, "Error looking up ref %d\n", ret);
8614                 return ret;
8615         }
8616
8617         while (1) {
8618                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8619                         ret = btrfs_next_leaf(root, path);
8620                         if (ret) {
8621                                 fprintf(stderr, "Couldn't find our ref, next\n");
8622                                 return -EINVAL;
8623                         }
8624                 }
8625                 leaf = path->nodes[0];
8626                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8627                 if (key.objectid != dback->owner ||
8628                     key.type != BTRFS_EXTENT_DATA_KEY) {
8629                         fprintf(stderr, "Couldn't find our ref, search\n");
8630                         return -EINVAL;
8631                 }
8632                 fi = btrfs_item_ptr(leaf, path->slots[0],
8633                                     struct btrfs_file_extent_item);
8634                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8635                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8636
8637                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8638                         break;
8639                 path->slots[0]++;
8640         }
8641
8642         btrfs_release_path(path);
8643
8644         trans = btrfs_start_transaction(root, 1);
8645         if (IS_ERR(trans))
8646                 return PTR_ERR(trans);
8647
8648         /*
8649          * Ok we have the key of the file extent we want to fix, now we can cow
8650          * down to the thing and fix it.
8651          */
8652         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8653         if (ret < 0) {
8654                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8655                         key.objectid, key.type, key.offset, ret);
8656                 goto out;
8657         }
8658         if (ret > 0) {
8659                 fprintf(stderr, "Well that's odd, we just found this key "
8660                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8661                         key.offset);
8662                 ret = -EINVAL;
8663                 goto out;
8664         }
8665         leaf = path->nodes[0];
8666         fi = btrfs_item_ptr(leaf, path->slots[0],
8667                             struct btrfs_file_extent_item);
8668
8669         if (btrfs_file_extent_compression(leaf, fi) &&
8670             dback->disk_bytenr != entry->bytenr) {
8671                 fprintf(stderr, "Ref doesn't match the record start and is "
8672                         "compressed, please take a btrfs-image of this file "
8673                         "system and send it to a btrfs developer so they can "
8674                         "complete this functionality for bytenr %Lu\n",
8675                         dback->disk_bytenr);
8676                 ret = -EINVAL;
8677                 goto out;
8678         }
8679
8680         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8681                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8682         } else if (dback->disk_bytenr > entry->bytenr) {
8683                 u64 off_diff, offset;
8684
8685                 off_diff = dback->disk_bytenr - entry->bytenr;
8686                 offset = btrfs_file_extent_offset(leaf, fi);
8687                 if (dback->disk_bytenr + offset +
8688                     btrfs_file_extent_num_bytes(leaf, fi) >
8689                     entry->bytenr + entry->bytes) {
8690                         fprintf(stderr, "Ref is past the entry end, please "
8691                                 "take a btrfs-image of this file system and "
8692                                 "send it to a btrfs developer, ref %Lu\n",
8693                                 dback->disk_bytenr);
8694                         ret = -EINVAL;
8695                         goto out;
8696                 }
8697                 offset += off_diff;
8698                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8699                 btrfs_set_file_extent_offset(leaf, fi, offset);
8700         } else if (dback->disk_bytenr < entry->bytenr) {
8701                 u64 offset;
8702
8703                 offset = btrfs_file_extent_offset(leaf, fi);
8704                 if (dback->disk_bytenr + offset < entry->bytenr) {
8705                         fprintf(stderr, "Ref is before the entry start, please"
8706                                 " take a btrfs-image of this file system and "
8707                                 "send it to a btrfs developer, ref %Lu\n",
8708                                 dback->disk_bytenr);
8709                         ret = -EINVAL;
8710                         goto out;
8711                 }
8712
8713                 offset += dback->disk_bytenr;
8714                 offset -= entry->bytenr;
8715                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8716                 btrfs_set_file_extent_offset(leaf, fi, offset);
8717         }
8718
8719         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8720
8721         /*
8722          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8723          * only do this if we aren't using compression, otherwise it's a
8724          * trickier case.
8725          */
8726         if (!btrfs_file_extent_compression(leaf, fi))
8727                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8728         else
8729                 printf("ram bytes may be wrong?\n");
8730         btrfs_mark_buffer_dirty(leaf);
8731 out:
8732         err = btrfs_commit_transaction(trans, root);
8733         btrfs_release_path(path);
8734         return ret ? ret : err;
8735 }
8736
8737 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8738                            struct extent_record *rec)
8739 {
8740         struct extent_backref *back, *tmp;
8741         struct data_backref *dback;
8742         struct extent_entry *entry, *best = NULL;
8743         LIST_HEAD(entries);
8744         int nr_entries = 0;
8745         int broken_entries = 0;
8746         int ret = 0;
8747         short mismatch = 0;
8748
8749         /*
8750          * Metadata is easy and the backrefs should always agree on bytenr and
8751          * size, if not we've got bigger issues.
8752          */
8753         if (rec->metadata)
8754                 return 0;
8755
8756         rbtree_postorder_for_each_entry_safe(back, tmp,
8757                                              &rec->backref_tree, node) {
8758                 if (back->full_backref || !back->is_data)
8759                         continue;
8760
8761                 dback = to_data_backref(back);
8762
8763                 /*
8764                  * We only pay attention to backrefs that we found a real
8765                  * backref for.
8766                  */
8767                 if (dback->found_ref == 0)
8768                         continue;
8769
8770                 /*
8771                  * For now we only catch when the bytes don't match, not the
8772                  * bytenr.  We can easily do this at the same time, but I want
8773                  * to have a fs image to test on before we just add repair
8774                  * functionality willy-nilly so we know we won't screw up the
8775                  * repair.
8776                  */
8777
8778                 entry = find_entry(&entries, dback->disk_bytenr,
8779                                    dback->bytes);
8780                 if (!entry) {
8781                         entry = malloc(sizeof(struct extent_entry));
8782                         if (!entry) {
8783                                 ret = -ENOMEM;
8784                                 goto out;
8785                         }
8786                         memset(entry, 0, sizeof(*entry));
8787                         entry->bytenr = dback->disk_bytenr;
8788                         entry->bytes = dback->bytes;
8789                         list_add_tail(&entry->list, &entries);
8790                         nr_entries++;
8791                 }
8792
8793                 /*
8794                  * If we only have on entry we may think the entries agree when
8795                  * in reality they don't so we have to do some extra checking.
8796                  */
8797                 if (dback->disk_bytenr != rec->start ||
8798                     dback->bytes != rec->nr || back->broken)
8799                         mismatch = 1;
8800
8801                 if (back->broken) {
8802                         entry->broken++;
8803                         broken_entries++;
8804                 }
8805
8806                 entry->count++;
8807         }
8808
8809         /* Yay all the backrefs agree, carry on good sir */
8810         if (nr_entries <= 1 && !mismatch)
8811                 goto out;
8812
8813         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8814                 "%Lu\n", rec->start);
8815
8816         /*
8817          * First we want to see if the backrefs can agree amongst themselves who
8818          * is right, so figure out which one of the entries has the highest
8819          * count.
8820          */
8821         best = find_most_right_entry(&entries);
8822
8823         /*
8824          * Ok so we may have an even split between what the backrefs think, so
8825          * this is where we use the extent ref to see what it thinks.
8826          */
8827         if (!best) {
8828                 entry = find_entry(&entries, rec->start, rec->nr);
8829                 if (!entry && (!broken_entries || !rec->found_rec)) {
8830                         fprintf(stderr, "Backrefs don't agree with each other "
8831                                 "and extent record doesn't agree with anybody,"
8832                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8833                                 rec->start, rec->nr);
8834                         ret = -EINVAL;
8835                         goto out;
8836                 } else if (!entry) {
8837                         /*
8838                          * Ok our backrefs were broken, we'll assume this is the
8839                          * correct value and add an entry for this range.
8840                          */
8841                         entry = malloc(sizeof(struct extent_entry));
8842                         if (!entry) {
8843                                 ret = -ENOMEM;
8844                                 goto out;
8845                         }
8846                         memset(entry, 0, sizeof(*entry));
8847                         entry->bytenr = rec->start;
8848                         entry->bytes = rec->nr;
8849                         list_add_tail(&entry->list, &entries);
8850                         nr_entries++;
8851                 }
8852                 entry->count++;
8853                 best = find_most_right_entry(&entries);
8854                 if (!best) {
8855                         fprintf(stderr, "Backrefs and extent record evenly "
8856                                 "split on who is right, this is going to "
8857                                 "require user input to fix bytenr %Lu bytes "
8858                                 "%Lu\n", rec->start, rec->nr);
8859                         ret = -EINVAL;
8860                         goto out;
8861                 }
8862         }
8863
8864         /*
8865          * I don't think this can happen currently as we'll abort() if we catch
8866          * this case higher up, but in case somebody removes that we still can't
8867          * deal with it properly here yet, so just bail out of that's the case.
8868          */
8869         if (best->bytenr != rec->start) {
8870                 fprintf(stderr, "Extent start and backref starts don't match, "
8871                         "please use btrfs-image on this file system and send "
8872                         "it to a btrfs developer so they can make fsck fix "
8873                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8874                         rec->start, rec->nr);
8875                 ret = -EINVAL;
8876                 goto out;
8877         }
8878
8879         /*
8880          * Ok great we all agreed on an extent record, let's go find the real
8881          * references and fix up the ones that don't match.
8882          */
8883         rbtree_postorder_for_each_entry_safe(back, tmp,
8884                                              &rec->backref_tree, node) {
8885                 if (back->full_backref || !back->is_data)
8886                         continue;
8887
8888                 dback = to_data_backref(back);
8889
8890                 /*
8891                  * Still ignoring backrefs that don't have a real ref attached
8892                  * to them.
8893                  */
8894                 if (dback->found_ref == 0)
8895                         continue;
8896
8897                 if (dback->bytes == best->bytes &&
8898                     dback->disk_bytenr == best->bytenr)
8899                         continue;
8900
8901                 ret = repair_ref(info, path, dback, best);
8902                 if (ret)
8903                         goto out;
8904         }
8905
8906         /*
8907          * Ok we messed with the actual refs, which means we need to drop our
8908          * entire cache and go back and rescan.  I know this is a huge pain and
8909          * adds a lot of extra work, but it's the only way to be safe.  Once all
8910          * the backrefs agree we may not need to do anything to the extent
8911          * record itself.
8912          */
8913         ret = -EAGAIN;
8914 out:
8915         while (!list_empty(&entries)) {
8916                 entry = list_entry(entries.next, struct extent_entry, list);
8917                 list_del_init(&entry->list);
8918                 free(entry);
8919         }
8920         return ret;
8921 }
8922
8923 static int process_duplicates(struct cache_tree *extent_cache,
8924                               struct extent_record *rec)
8925 {
8926         struct extent_record *good, *tmp;
8927         struct cache_extent *cache;
8928         int ret;
8929
8930         /*
8931          * If we found a extent record for this extent then return, or if we
8932          * have more than one duplicate we are likely going to need to delete
8933          * something.
8934          */
8935         if (rec->found_rec || rec->num_duplicates > 1)
8936                 return 0;
8937
8938         /* Shouldn't happen but just in case */
8939         BUG_ON(!rec->num_duplicates);
8940
8941         /*
8942          * So this happens if we end up with a backref that doesn't match the
8943          * actual extent entry.  So either the backref is bad or the extent
8944          * entry is bad.  Either way we want to have the extent_record actually
8945          * reflect what we found in the extent_tree, so we need to take the
8946          * duplicate out and use that as the extent_record since the only way we
8947          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8948          */
8949         remove_cache_extent(extent_cache, &rec->cache);
8950
8951         good = to_extent_record(rec->dups.next);
8952         list_del_init(&good->list);
8953         INIT_LIST_HEAD(&good->backrefs);
8954         INIT_LIST_HEAD(&good->dups);
8955         good->cache.start = good->start;
8956         good->cache.size = good->nr;
8957         good->content_checked = 0;
8958         good->owner_ref_checked = 0;
8959         good->num_duplicates = 0;
8960         good->refs = rec->refs;
8961         list_splice_init(&rec->backrefs, &good->backrefs);
8962         while (1) {
8963                 cache = lookup_cache_extent(extent_cache, good->start,
8964                                             good->nr);
8965                 if (!cache)
8966                         break;
8967                 tmp = container_of(cache, struct extent_record, cache);
8968
8969                 /*
8970                  * If we find another overlapping extent and it's found_rec is
8971                  * set then it's a duplicate and we need to try and delete
8972                  * something.
8973                  */
8974                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8975                         if (list_empty(&good->list))
8976                                 list_add_tail(&good->list,
8977                                               &duplicate_extents);
8978                         good->num_duplicates += tmp->num_duplicates + 1;
8979                         list_splice_init(&tmp->dups, &good->dups);
8980                         list_del_init(&tmp->list);
8981                         list_add_tail(&tmp->list, &good->dups);
8982                         remove_cache_extent(extent_cache, &tmp->cache);
8983                         continue;
8984                 }
8985
8986                 /*
8987                  * Ok we have another non extent item backed extent rec, so lets
8988                  * just add it to this extent and carry on like we did above.
8989                  */
8990                 good->refs += tmp->refs;
8991                 list_splice_init(&tmp->backrefs, &good->backrefs);
8992                 remove_cache_extent(extent_cache, &tmp->cache);
8993                 free(tmp);
8994         }
8995         ret = insert_cache_extent(extent_cache, &good->cache);
8996         BUG_ON(ret);
8997         free(rec);
8998         return good->num_duplicates ? 0 : 1;
8999 }
9000
9001 static int delete_duplicate_records(struct btrfs_root *root,
9002                                     struct extent_record *rec)
9003 {
9004         struct btrfs_trans_handle *trans;
9005         LIST_HEAD(delete_list);
9006         struct btrfs_path path;
9007         struct extent_record *tmp, *good, *n;
9008         int nr_del = 0;
9009         int ret = 0, err;
9010         struct btrfs_key key;
9011
9012         btrfs_init_path(&path);
9013
9014         good = rec;
9015         /* Find the record that covers all of the duplicates. */
9016         list_for_each_entry(tmp, &rec->dups, list) {
9017                 if (good->start < tmp->start)
9018                         continue;
9019                 if (good->nr > tmp->nr)
9020                         continue;
9021
9022                 if (tmp->start + tmp->nr < good->start + good->nr) {
9023                         fprintf(stderr, "Ok we have overlapping extents that "
9024                                 "aren't completely covered by each other, this "
9025                                 "is going to require more careful thought.  "
9026                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9027                                 tmp->start, tmp->nr, good->start, good->nr);
9028                         abort();
9029                 }
9030                 good = tmp;
9031         }
9032
9033         if (good != rec)
9034                 list_add_tail(&rec->list, &delete_list);
9035
9036         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9037                 if (tmp == good)
9038                         continue;
9039                 list_move_tail(&tmp->list, &delete_list);
9040         }
9041
9042         root = root->fs_info->extent_root;
9043         trans = btrfs_start_transaction(root, 1);
9044         if (IS_ERR(trans)) {
9045                 ret = PTR_ERR(trans);
9046                 goto out;
9047         }
9048
9049         list_for_each_entry(tmp, &delete_list, list) {
9050                 if (tmp->found_rec == 0)
9051                         continue;
9052                 key.objectid = tmp->start;
9053                 key.type = BTRFS_EXTENT_ITEM_KEY;
9054                 key.offset = tmp->nr;
9055
9056                 /* Shouldn't happen but just in case */
9057                 if (tmp->metadata) {
9058                         fprintf(stderr, "Well this shouldn't happen, extent "
9059                                 "record overlaps but is metadata? "
9060                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9061                         abort();
9062                 }
9063
9064                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9065                 if (ret) {
9066                         if (ret > 0)
9067                                 ret = -EINVAL;
9068                         break;
9069                 }
9070                 ret = btrfs_del_item(trans, root, &path);
9071                 if (ret)
9072                         break;
9073                 btrfs_release_path(&path);
9074                 nr_del++;
9075         }
9076         err = btrfs_commit_transaction(trans, root);
9077         if (err && !ret)
9078                 ret = err;
9079 out:
9080         while (!list_empty(&delete_list)) {
9081                 tmp = to_extent_record(delete_list.next);
9082                 list_del_init(&tmp->list);
9083                 if (tmp == rec)
9084                         continue;
9085                 free(tmp);
9086         }
9087
9088         while (!list_empty(&rec->dups)) {
9089                 tmp = to_extent_record(rec->dups.next);
9090                 list_del_init(&tmp->list);
9091                 free(tmp);
9092         }
9093
9094         btrfs_release_path(&path);
9095
9096         if (!ret && !nr_del)
9097                 rec->num_duplicates = 0;
9098
9099         return ret ? ret : nr_del;
9100 }
9101
9102 static int find_possible_backrefs(struct btrfs_fs_info *info,
9103                                   struct btrfs_path *path,
9104                                   struct cache_tree *extent_cache,
9105                                   struct extent_record *rec)
9106 {
9107         struct btrfs_root *root;
9108         struct extent_backref *back, *tmp;
9109         struct data_backref *dback;
9110         struct cache_extent *cache;
9111         struct btrfs_file_extent_item *fi;
9112         struct btrfs_key key;
9113         u64 bytenr, bytes;
9114         int ret;
9115
9116         rbtree_postorder_for_each_entry_safe(back, tmp,
9117                                              &rec->backref_tree, node) {
9118                 /* Don't care about full backrefs (poor unloved backrefs) */
9119                 if (back->full_backref || !back->is_data)
9120                         continue;
9121
9122                 dback = to_data_backref(back);
9123
9124                 /* We found this one, we don't need to do a lookup */
9125                 if (dback->found_ref)
9126                         continue;
9127
9128                 key.objectid = dback->root;
9129                 key.type = BTRFS_ROOT_ITEM_KEY;
9130                 key.offset = (u64)-1;
9131
9132                 root = btrfs_read_fs_root(info, &key);
9133
9134                 /* No root, definitely a bad ref, skip */
9135                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9136                         continue;
9137                 /* Other err, exit */
9138                 if (IS_ERR(root))
9139                         return PTR_ERR(root);
9140
9141                 key.objectid = dback->owner;
9142                 key.type = BTRFS_EXTENT_DATA_KEY;
9143                 key.offset = dback->offset;
9144                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9145                 if (ret) {
9146                         btrfs_release_path(path);
9147                         if (ret < 0)
9148                                 return ret;
9149                         /* Didn't find it, we can carry on */
9150                         ret = 0;
9151                         continue;
9152                 }
9153
9154                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9155                                     struct btrfs_file_extent_item);
9156                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9157                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9158                 btrfs_release_path(path);
9159                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9160                 if (cache) {
9161                         struct extent_record *tmp;
9162                         tmp = container_of(cache, struct extent_record, cache);
9163
9164                         /*
9165                          * If we found an extent record for the bytenr for this
9166                          * particular backref then we can't add it to our
9167                          * current extent record.  We only want to add backrefs
9168                          * that don't have a corresponding extent item in the
9169                          * extent tree since they likely belong to this record
9170                          * and we need to fix it if it doesn't match bytenrs.
9171                          */
9172                         if  (tmp->found_rec)
9173                                 continue;
9174                 }
9175
9176                 dback->found_ref += 1;
9177                 dback->disk_bytenr = bytenr;
9178                 dback->bytes = bytes;
9179
9180                 /*
9181                  * Set this so the verify backref code knows not to trust the
9182                  * values in this backref.
9183                  */
9184                 back->broken = 1;
9185         }
9186
9187         return 0;
9188 }
9189
9190 /*
9191  * Record orphan data ref into corresponding root.
9192  *
9193  * Return 0 if the extent item contains data ref and recorded.
9194  * Return 1 if the extent item contains no useful data ref
9195  *   On that case, it may contains only shared_dataref or metadata backref
9196  *   or the file extent exists(this should be handled by the extent bytenr
9197  *   recovery routine)
9198  * Return <0 if something goes wrong.
9199  */
9200 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9201                                       struct extent_record *rec)
9202 {
9203         struct btrfs_key key;
9204         struct btrfs_root *dest_root;
9205         struct extent_backref *back, *tmp;
9206         struct data_backref *dback;
9207         struct orphan_data_extent *orphan;
9208         struct btrfs_path path;
9209         int recorded_data_ref = 0;
9210         int ret = 0;
9211
9212         if (rec->metadata)
9213                 return 1;
9214         btrfs_init_path(&path);
9215         rbtree_postorder_for_each_entry_safe(back, tmp,
9216                                              &rec->backref_tree, node) {
9217                 if (back->full_backref || !back->is_data ||
9218                     !back->found_extent_tree)
9219                         continue;
9220                 dback = to_data_backref(back);
9221                 if (dback->found_ref)
9222                         continue;
9223                 key.objectid = dback->root;
9224                 key.type = BTRFS_ROOT_ITEM_KEY;
9225                 key.offset = (u64)-1;
9226
9227                 dest_root = btrfs_read_fs_root(fs_info, &key);
9228
9229                 /* For non-exist root we just skip it */
9230                 if (IS_ERR(dest_root) || !dest_root)
9231                         continue;
9232
9233                 key.objectid = dback->owner;
9234                 key.type = BTRFS_EXTENT_DATA_KEY;
9235                 key.offset = dback->offset;
9236
9237                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9238                 btrfs_release_path(&path);
9239                 /*
9240                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9241                  * we need to record it for inode/file extent rebuild.
9242                  * For ret > 0, we record it only for file extent rebuild.
9243                  * For ret == 0, the file extent exists but only bytenr
9244                  * mismatch, let the original bytenr fix routine to handle,
9245                  * don't record it.
9246                  */
9247                 if (ret == 0)
9248                         continue;
9249                 ret = 0;
9250                 orphan = malloc(sizeof(*orphan));
9251                 if (!orphan) {
9252                         ret = -ENOMEM;
9253                         goto out;
9254                 }
9255                 INIT_LIST_HEAD(&orphan->list);
9256                 orphan->root = dback->root;
9257                 orphan->objectid = dback->owner;
9258                 orphan->offset = dback->offset;
9259                 orphan->disk_bytenr = rec->cache.start;
9260                 orphan->disk_len = rec->cache.size;
9261                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9262                 recorded_data_ref = 1;
9263         }
9264 out:
9265         btrfs_release_path(&path);
9266         if (!ret)
9267                 return !recorded_data_ref;
9268         else
9269                 return ret;
9270 }
9271
9272 /*
9273  * when an incorrect extent item is found, this will delete
9274  * all of the existing entries for it and recreate them
9275  * based on what the tree scan found.
9276  */
9277 static int fixup_extent_refs(struct btrfs_fs_info *info,
9278                              struct cache_tree *extent_cache,
9279                              struct extent_record *rec)
9280 {
9281         struct btrfs_trans_handle *trans = NULL;
9282         int ret;
9283         struct btrfs_path path;
9284         struct cache_extent *cache;
9285         struct extent_backref *back, *tmp;
9286         int allocated = 0;
9287         u64 flags = 0;
9288
9289         if (rec->flag_block_full_backref)
9290                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9291
9292         btrfs_init_path(&path);
9293         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9294                 /*
9295                  * Sometimes the backrefs themselves are so broken they don't
9296                  * get attached to any meaningful rec, so first go back and
9297                  * check any of our backrefs that we couldn't find and throw
9298                  * them into the list if we find the backref so that
9299                  * verify_backrefs can figure out what to do.
9300                  */
9301                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9302                 if (ret < 0)
9303                         goto out;
9304         }
9305
9306         /* step one, make sure all of the backrefs agree */
9307         ret = verify_backrefs(info, &path, rec);
9308         if (ret < 0)
9309                 goto out;
9310
9311         trans = btrfs_start_transaction(info->extent_root, 1);
9312         if (IS_ERR(trans)) {
9313                 ret = PTR_ERR(trans);
9314                 goto out;
9315         }
9316
9317         /* step two, delete all the existing records */
9318         ret = delete_extent_records(trans, info->extent_root, &path,
9319                                     rec->start);
9320
9321         if (ret < 0)
9322                 goto out;
9323
9324         /* was this block corrupt?  If so, don't add references to it */
9325         cache = lookup_cache_extent(info->corrupt_blocks,
9326                                     rec->start, rec->max_size);
9327         if (cache) {
9328                 ret = 0;
9329                 goto out;
9330         }
9331
9332         /* step three, recreate all the refs we did find */
9333         rbtree_postorder_for_each_entry_safe(back, tmp,
9334                                              &rec->backref_tree, node) {
9335                 /*
9336                  * if we didn't find any references, don't create a
9337                  * new extent record
9338                  */
9339                 if (!back->found_ref)
9340                         continue;
9341
9342                 rec->bad_full_backref = 0;
9343                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9344                 allocated = 1;
9345
9346                 if (ret)
9347                         goto out;
9348         }
9349 out:
9350         if (trans) {
9351                 int err = btrfs_commit_transaction(trans, info->extent_root);
9352                 if (!ret)
9353                         ret = err;
9354         }
9355
9356         if (!ret)
9357                 fprintf(stderr, "Repaired extent references for %llu\n",
9358                                 (unsigned long long)rec->start);
9359
9360         btrfs_release_path(&path);
9361         return ret;
9362 }
9363
9364 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9365                               struct extent_record *rec)
9366 {
9367         struct btrfs_trans_handle *trans;
9368         struct btrfs_root *root = fs_info->extent_root;
9369         struct btrfs_path path;
9370         struct btrfs_extent_item *ei;
9371         struct btrfs_key key;
9372         u64 flags;
9373         int ret = 0;
9374
9375         key.objectid = rec->start;
9376         if (rec->metadata) {
9377                 key.type = BTRFS_METADATA_ITEM_KEY;
9378                 key.offset = rec->info_level;
9379         } else {
9380                 key.type = BTRFS_EXTENT_ITEM_KEY;
9381                 key.offset = rec->max_size;
9382         }
9383
9384         trans = btrfs_start_transaction(root, 0);
9385         if (IS_ERR(trans))
9386                 return PTR_ERR(trans);
9387
9388         btrfs_init_path(&path);
9389         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9390         if (ret < 0) {
9391                 btrfs_release_path(&path);
9392                 btrfs_commit_transaction(trans, root);
9393                 return ret;
9394         } else if (ret) {
9395                 fprintf(stderr, "Didn't find extent for %llu\n",
9396                         (unsigned long long)rec->start);
9397                 btrfs_release_path(&path);
9398                 btrfs_commit_transaction(trans, root);
9399                 return -ENOENT;
9400         }
9401
9402         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9403                             struct btrfs_extent_item);
9404         flags = btrfs_extent_flags(path.nodes[0], ei);
9405         if (rec->flag_block_full_backref) {
9406                 fprintf(stderr, "setting full backref on %llu\n",
9407                         (unsigned long long)key.objectid);
9408                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9409         } else {
9410                 fprintf(stderr, "clearing full backref on %llu\n",
9411                         (unsigned long long)key.objectid);
9412                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9413         }
9414         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9415         btrfs_mark_buffer_dirty(path.nodes[0]);
9416         btrfs_release_path(&path);
9417         ret = btrfs_commit_transaction(trans, root);
9418         if (!ret)
9419                 fprintf(stderr, "Repaired extent flags for %llu\n",
9420                                 (unsigned long long)rec->start);
9421
9422         return ret;
9423 }
9424
9425 /* right now we only prune from the extent allocation tree */
9426 static int prune_one_block(struct btrfs_trans_handle *trans,
9427                            struct btrfs_fs_info *info,
9428                            struct btrfs_corrupt_block *corrupt)
9429 {
9430         int ret;
9431         struct btrfs_path path;
9432         struct extent_buffer *eb;
9433         u64 found;
9434         int slot;
9435         int nritems;
9436         int level = corrupt->level + 1;
9437
9438         btrfs_init_path(&path);
9439 again:
9440         /* we want to stop at the parent to our busted block */
9441         path.lowest_level = level;
9442
9443         ret = btrfs_search_slot(trans, info->extent_root,
9444                                 &corrupt->key, &path, -1, 1);
9445
9446         if (ret < 0)
9447                 goto out;
9448
9449         eb = path.nodes[level];
9450         if (!eb) {
9451                 ret = -ENOENT;
9452                 goto out;
9453         }
9454
9455         /*
9456          * hopefully the search gave us the block we want to prune,
9457          * lets try that first
9458          */
9459         slot = path.slots[level];
9460         found =  btrfs_node_blockptr(eb, slot);
9461         if (found == corrupt->cache.start)
9462                 goto del_ptr;
9463
9464         nritems = btrfs_header_nritems(eb);
9465
9466         /* the search failed, lets scan this node and hope we find it */
9467         for (slot = 0; slot < nritems; slot++) {
9468                 found =  btrfs_node_blockptr(eb, slot);
9469                 if (found == corrupt->cache.start)
9470                         goto del_ptr;
9471         }
9472         /*
9473          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9474          * to this block
9475          */
9476         if (eb == info->extent_root->node) {
9477                 ret = -ENOENT;
9478                 goto out;
9479         } else {
9480                 level++;
9481                 btrfs_release_path(&path);
9482                 goto again;
9483         }
9484
9485 del_ptr:
9486         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9487         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9488
9489 out:
9490         btrfs_release_path(&path);
9491         return ret;
9492 }
9493
9494 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9495 {
9496         struct btrfs_trans_handle *trans = NULL;
9497         struct cache_extent *cache;
9498         struct btrfs_corrupt_block *corrupt;
9499
9500         while (1) {
9501                 cache = search_cache_extent(info->corrupt_blocks, 0);
9502                 if (!cache)
9503                         break;
9504                 if (!trans) {
9505                         trans = btrfs_start_transaction(info->extent_root, 1);
9506                         if (IS_ERR(trans))
9507                                 return PTR_ERR(trans);
9508                 }
9509                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9510                 prune_one_block(trans, info, corrupt);
9511                 remove_cache_extent(info->corrupt_blocks, cache);
9512         }
9513         if (trans)
9514                 return btrfs_commit_transaction(trans, info->extent_root);
9515         return 0;
9516 }
9517
9518 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9519 {
9520         struct btrfs_block_group_cache *cache;
9521         u64 start, end;
9522         int ret;
9523
9524         while (1) {
9525                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9526                                             &start, &end, EXTENT_DIRTY);
9527                 if (ret)
9528                         break;
9529                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9530         }
9531
9532         start = 0;
9533         while (1) {
9534                 cache = btrfs_lookup_first_block_group(fs_info, start);
9535                 if (!cache)
9536                         break;
9537                 if (cache->cached)
9538                         cache->cached = 0;
9539                 start = cache->key.objectid + cache->key.offset;
9540         }
9541 }
9542
9543 static int check_extent_refs(struct btrfs_root *root,
9544                              struct cache_tree *extent_cache)
9545 {
9546         struct extent_record *rec;
9547         struct cache_extent *cache;
9548         int ret = 0;
9549         int had_dups = 0;
9550
9551         if (repair) {
9552                 /*
9553                  * if we're doing a repair, we have to make sure
9554                  * we don't allocate from the problem extents.
9555                  * In the worst case, this will be all the
9556                  * extents in the FS
9557                  */
9558                 cache = search_cache_extent(extent_cache, 0);
9559                 while(cache) {
9560                         rec = container_of(cache, struct extent_record, cache);
9561                         set_extent_dirty(root->fs_info->excluded_extents,
9562                                          rec->start,
9563                                          rec->start + rec->max_size - 1);
9564                         cache = next_cache_extent(cache);
9565                 }
9566
9567                 /* pin down all the corrupted blocks too */
9568                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9569                 while(cache) {
9570                         set_extent_dirty(root->fs_info->excluded_extents,
9571                                          cache->start,
9572                                          cache->start + cache->size - 1);
9573                         cache = next_cache_extent(cache);
9574                 }
9575                 prune_corrupt_blocks(root->fs_info);
9576                 reset_cached_block_groups(root->fs_info);
9577         }
9578
9579         reset_cached_block_groups(root->fs_info);
9580
9581         /*
9582          * We need to delete any duplicate entries we find first otherwise we
9583          * could mess up the extent tree when we have backrefs that actually
9584          * belong to a different extent item and not the weird duplicate one.
9585          */
9586         while (repair && !list_empty(&duplicate_extents)) {
9587                 rec = to_extent_record(duplicate_extents.next);
9588                 list_del_init(&rec->list);
9589
9590                 /* Sometimes we can find a backref before we find an actual
9591                  * extent, so we need to process it a little bit to see if there
9592                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9593                  * if this is a backref screwup.  If we need to delete stuff
9594                  * process_duplicates() will return 0, otherwise it will return
9595                  * 1 and we
9596                  */
9597                 if (process_duplicates(extent_cache, rec))
9598                         continue;
9599                 ret = delete_duplicate_records(root, rec);
9600                 if (ret < 0)
9601                         return ret;
9602                 /*
9603                  * delete_duplicate_records will return the number of entries
9604                  * deleted, so if it's greater than 0 then we know we actually
9605                  * did something and we need to remove.
9606                  */
9607                 if (ret)
9608                         had_dups = 1;
9609         }
9610
9611         if (had_dups)
9612                 return -EAGAIN;
9613
9614         while(1) {
9615                 int cur_err = 0;
9616                 int fix = 0;
9617
9618                 cache = search_cache_extent(extent_cache, 0);
9619                 if (!cache)
9620                         break;
9621                 rec = container_of(cache, struct extent_record, cache);
9622                 if (rec->num_duplicates) {
9623                         fprintf(stderr, "extent item %llu has multiple extent "
9624                                 "items\n", (unsigned long long)rec->start);
9625                         cur_err = 1;
9626                 }
9627
9628                 if (rec->refs != rec->extent_item_refs) {
9629                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9630                                 (unsigned long long)rec->start,
9631                                 (unsigned long long)rec->nr);
9632                         fprintf(stderr, "extent item %llu, found %llu\n",
9633                                 (unsigned long long)rec->extent_item_refs,
9634                                 (unsigned long long)rec->refs);
9635                         ret = record_orphan_data_extents(root->fs_info, rec);
9636                         if (ret < 0)
9637                                 goto repair_abort;
9638                         fix = ret;
9639                         cur_err = 1;
9640                 }
9641                 if (all_backpointers_checked(rec, 1)) {
9642                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9643                                 (unsigned long long)rec->start,
9644                                 (unsigned long long)rec->nr);
9645                         fix = 1;
9646                         cur_err = 1;
9647                 }
9648                 if (!rec->owner_ref_checked) {
9649                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9650                                 (unsigned long long)rec->start,
9651                                 (unsigned long long)rec->nr);
9652                         fix = 1;
9653                         cur_err = 1;
9654                 }
9655
9656                 if (repair && fix) {
9657                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9658                         if (ret)
9659                                 goto repair_abort;
9660                 }
9661
9662
9663                 if (rec->bad_full_backref) {
9664                         fprintf(stderr, "bad full backref, on [%llu]\n",
9665                                 (unsigned long long)rec->start);
9666                         if (repair) {
9667                                 ret = fixup_extent_flags(root->fs_info, rec);
9668                                 if (ret)
9669                                         goto repair_abort;
9670                                 fix = 1;
9671                         }
9672                         cur_err = 1;
9673                 }
9674                 /*
9675                  * Although it's not a extent ref's problem, we reuse this
9676                  * routine for error reporting.
9677                  * No repair function yet.
9678                  */
9679                 if (rec->crossing_stripes) {
9680                         fprintf(stderr,
9681                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9682                                 rec->start, rec->start + rec->max_size);
9683                         cur_err = 1;
9684                 }
9685
9686                 if (rec->wrong_chunk_type) {
9687                         fprintf(stderr,
9688                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9689                                 rec->start, rec->start + rec->max_size);
9690                         cur_err = 1;
9691                 }
9692
9693                 remove_cache_extent(extent_cache, cache);
9694                 free_all_extent_backrefs(rec);
9695                 if (!init_extent_tree && repair && (!cur_err || fix))
9696                         clear_extent_dirty(root->fs_info->excluded_extents,
9697                                            rec->start,
9698                                            rec->start + rec->max_size - 1);
9699                 free(rec);
9700         }
9701 repair_abort:
9702         if (repair) {
9703                 if (ret && ret != -EAGAIN) {
9704                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9705                         exit(1);
9706                 } else if (!ret) {
9707                         struct btrfs_trans_handle *trans;
9708
9709                         root = root->fs_info->extent_root;
9710                         trans = btrfs_start_transaction(root, 1);
9711                         if (IS_ERR(trans)) {
9712                                 ret = PTR_ERR(trans);
9713                                 goto repair_abort;
9714                         }
9715
9716                         ret = btrfs_fix_block_accounting(trans, root);
9717                         if (ret)
9718                                 goto repair_abort;
9719                         ret = btrfs_commit_transaction(trans, root);
9720                         if (ret)
9721                                 goto repair_abort;
9722                 }
9723                 return ret;
9724         }
9725         return 0;
9726 }
9727
9728 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9729 {
9730         u64 stripe_size;
9731
9732         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9733                 stripe_size = length;
9734                 stripe_size /= num_stripes;
9735         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9736                 stripe_size = length * 2;
9737                 stripe_size /= num_stripes;
9738         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9739                 stripe_size = length;
9740                 stripe_size /= (num_stripes - 1);
9741         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9742                 stripe_size = length;
9743                 stripe_size /= (num_stripes - 2);
9744         } else {
9745                 stripe_size = length;
9746         }
9747         return stripe_size;
9748 }
9749
9750 /*
9751  * Check the chunk with its block group/dev list ref:
9752  * Return 0 if all refs seems valid.
9753  * Return 1 if part of refs seems valid, need later check for rebuild ref
9754  * like missing block group and needs to search extent tree to rebuild them.
9755  * Return -1 if essential refs are missing and unable to rebuild.
9756  */
9757 static int check_chunk_refs(struct chunk_record *chunk_rec,
9758                             struct block_group_tree *block_group_cache,
9759                             struct device_extent_tree *dev_extent_cache,
9760                             int silent)
9761 {
9762         struct cache_extent *block_group_item;
9763         struct block_group_record *block_group_rec;
9764         struct cache_extent *dev_extent_item;
9765         struct device_extent_record *dev_extent_rec;
9766         u64 devid;
9767         u64 offset;
9768         u64 length;
9769         int metadump_v2 = 0;
9770         int i;
9771         int ret = 0;
9772
9773         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9774                                                chunk_rec->offset,
9775                                                chunk_rec->length);
9776         if (block_group_item) {
9777                 block_group_rec = container_of(block_group_item,
9778                                                struct block_group_record,
9779                                                cache);
9780                 if (chunk_rec->length != block_group_rec->offset ||
9781                     chunk_rec->offset != block_group_rec->objectid ||
9782                     (!metadump_v2 &&
9783                      chunk_rec->type_flags != block_group_rec->flags)) {
9784                         if (!silent)
9785                                 fprintf(stderr,
9786                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9787                                         chunk_rec->objectid,
9788                                         chunk_rec->type,
9789                                         chunk_rec->offset,
9790                                         chunk_rec->length,
9791                                         chunk_rec->offset,
9792                                         chunk_rec->type_flags,
9793                                         block_group_rec->objectid,
9794                                         block_group_rec->type,
9795                                         block_group_rec->offset,
9796                                         block_group_rec->offset,
9797                                         block_group_rec->objectid,
9798                                         block_group_rec->flags);
9799                         ret = -1;
9800                 } else {
9801                         list_del_init(&block_group_rec->list);
9802                         chunk_rec->bg_rec = block_group_rec;
9803                 }
9804         } else {
9805                 if (!silent)
9806                         fprintf(stderr,
9807                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9808                                 chunk_rec->objectid,
9809                                 chunk_rec->type,
9810                                 chunk_rec->offset,
9811                                 chunk_rec->length,
9812                                 chunk_rec->offset,
9813                                 chunk_rec->type_flags);
9814                 ret = 1;
9815         }
9816
9817         if (metadump_v2)
9818                 return ret;
9819
9820         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9821                                     chunk_rec->num_stripes);
9822         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9823                 devid = chunk_rec->stripes[i].devid;
9824                 offset = chunk_rec->stripes[i].offset;
9825                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9826                                                        devid, offset, length);
9827                 if (dev_extent_item) {
9828                         dev_extent_rec = container_of(dev_extent_item,
9829                                                 struct device_extent_record,
9830                                                 cache);
9831                         if (dev_extent_rec->objectid != devid ||
9832                             dev_extent_rec->offset != offset ||
9833                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9834                             dev_extent_rec->length != length) {
9835                                 if (!silent)
9836                                         fprintf(stderr,
9837                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9838                                                 chunk_rec->objectid,
9839                                                 chunk_rec->type,
9840                                                 chunk_rec->offset,
9841                                                 chunk_rec->stripes[i].devid,
9842                                                 chunk_rec->stripes[i].offset,
9843                                                 dev_extent_rec->objectid,
9844                                                 dev_extent_rec->offset,
9845                                                 dev_extent_rec->length);
9846                                 ret = -1;
9847                         } else {
9848                                 list_move(&dev_extent_rec->chunk_list,
9849                                           &chunk_rec->dextents);
9850                         }
9851                 } else {
9852                         if (!silent)
9853                                 fprintf(stderr,
9854                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9855                                         chunk_rec->objectid,
9856                                         chunk_rec->type,
9857                                         chunk_rec->offset,
9858                                         chunk_rec->stripes[i].devid,
9859                                         chunk_rec->stripes[i].offset);
9860                         ret = -1;
9861                 }
9862         }
9863         return ret;
9864 }
9865
9866 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9867 int check_chunks(struct cache_tree *chunk_cache,
9868                  struct block_group_tree *block_group_cache,
9869                  struct device_extent_tree *dev_extent_cache,
9870                  struct list_head *good, struct list_head *bad,
9871                  struct list_head *rebuild, int silent)
9872 {
9873         struct cache_extent *chunk_item;
9874         struct chunk_record *chunk_rec;
9875         struct block_group_record *bg_rec;
9876         struct device_extent_record *dext_rec;
9877         int err;
9878         int ret = 0;
9879
9880         chunk_item = first_cache_extent(chunk_cache);
9881         while (chunk_item) {
9882                 chunk_rec = container_of(chunk_item, struct chunk_record,
9883                                          cache);
9884                 err = check_chunk_refs(chunk_rec, block_group_cache,
9885                                        dev_extent_cache, silent);
9886                 if (err < 0)
9887                         ret = err;
9888                 if (err == 0 && good)
9889                         list_add_tail(&chunk_rec->list, good);
9890                 if (err > 0 && rebuild)
9891                         list_add_tail(&chunk_rec->list, rebuild);
9892                 if (err < 0 && bad)
9893                         list_add_tail(&chunk_rec->list, bad);
9894                 chunk_item = next_cache_extent(chunk_item);
9895         }
9896
9897         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9898                 if (!silent)
9899                         fprintf(stderr,
9900                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9901                                 bg_rec->objectid,
9902                                 bg_rec->offset,
9903                                 bg_rec->flags);
9904                 if (!ret)
9905                         ret = 1;
9906         }
9907
9908         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9909                             chunk_list) {
9910                 if (!silent)
9911                         fprintf(stderr,
9912                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9913                                 dext_rec->objectid,
9914                                 dext_rec->offset,
9915                                 dext_rec->length);
9916                 if (!ret)
9917                         ret = 1;
9918         }
9919         return ret;
9920 }
9921
9922
9923 static int check_device_used(struct device_record *dev_rec,
9924                              struct device_extent_tree *dext_cache)
9925 {
9926         struct cache_extent *cache;
9927         struct device_extent_record *dev_extent_rec;
9928         u64 total_byte = 0;
9929
9930         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9931         while (cache) {
9932                 dev_extent_rec = container_of(cache,
9933                                               struct device_extent_record,
9934                                               cache);
9935                 if (dev_extent_rec->objectid != dev_rec->devid)
9936                         break;
9937
9938                 list_del_init(&dev_extent_rec->device_list);
9939                 total_byte += dev_extent_rec->length;
9940                 cache = next_cache_extent(cache);
9941         }
9942
9943         if (total_byte != dev_rec->byte_used) {
9944                 fprintf(stderr,
9945                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9946                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9947                         dev_rec->type, dev_rec->offset);
9948                 return -1;
9949         } else {
9950                 return 0;
9951         }
9952 }
9953
9954 /* check btrfs_dev_item -> btrfs_dev_extent */
9955 static int check_devices(struct rb_root *dev_cache,
9956                          struct device_extent_tree *dev_extent_cache)
9957 {
9958         struct rb_node *dev_node;
9959         struct device_record *dev_rec;
9960         struct device_extent_record *dext_rec;
9961         int err;
9962         int ret = 0;
9963
9964         dev_node = rb_first(dev_cache);
9965         while (dev_node) {
9966                 dev_rec = container_of(dev_node, struct device_record, node);
9967                 err = check_device_used(dev_rec, dev_extent_cache);
9968                 if (err)
9969                         ret = err;
9970
9971                 dev_node = rb_next(dev_node);
9972         }
9973         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9974                             device_list) {
9975                 fprintf(stderr,
9976                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9977                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9978                 if (!ret)
9979                         ret = 1;
9980         }
9981         return ret;
9982 }
9983
9984 static int add_root_item_to_list(struct list_head *head,
9985                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9986                                   u8 level, u8 drop_level,
9987                                   struct btrfs_key *drop_key)
9988 {
9989
9990         struct root_item_record *ri_rec;
9991         ri_rec = malloc(sizeof(*ri_rec));
9992         if (!ri_rec)
9993                 return -ENOMEM;
9994         ri_rec->bytenr = bytenr;
9995         ri_rec->objectid = objectid;
9996         ri_rec->level = level;
9997         ri_rec->drop_level = drop_level;
9998         ri_rec->last_snapshot = last_snapshot;
9999         if (drop_key)
10000                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10001         list_add_tail(&ri_rec->list, head);
10002
10003         return 0;
10004 }
10005
10006 static void free_root_item_list(struct list_head *list)
10007 {
10008         struct root_item_record *ri_rec;
10009
10010         while (!list_empty(list)) {
10011                 ri_rec = list_first_entry(list, struct root_item_record,
10012                                           list);
10013                 list_del_init(&ri_rec->list);
10014                 free(ri_rec);
10015         }
10016 }
10017
10018 static int deal_root_from_list(struct list_head *list,
10019                                struct btrfs_root *root,
10020                                struct block_info *bits,
10021                                int bits_nr,
10022                                struct cache_tree *pending,
10023                                struct cache_tree *seen,
10024                                struct cache_tree *reada,
10025                                struct cache_tree *nodes,
10026                                struct cache_tree *extent_cache,
10027                                struct cache_tree *chunk_cache,
10028                                struct rb_root *dev_cache,
10029                                struct block_group_tree *block_group_cache,
10030                                struct device_extent_tree *dev_extent_cache)
10031 {
10032         int ret = 0;
10033         u64 last;
10034
10035         while (!list_empty(list)) {
10036                 struct root_item_record *rec;
10037                 struct extent_buffer *buf;
10038                 rec = list_entry(list->next,
10039                                  struct root_item_record, list);
10040                 last = 0;
10041                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10042                 if (!extent_buffer_uptodate(buf)) {
10043                         free_extent_buffer(buf);
10044                         ret = -EIO;
10045                         break;
10046                 }
10047                 ret = add_root_to_pending(buf, extent_cache, pending,
10048                                     seen, nodes, rec->objectid);
10049                 if (ret < 0)
10050                         break;
10051                 /*
10052                  * To rebuild extent tree, we need deal with snapshot
10053                  * one by one, otherwise we deal with node firstly which
10054                  * can maximize readahead.
10055                  */
10056                 while (1) {
10057                         ret = run_next_block(root, bits, bits_nr, &last,
10058                                              pending, seen, reada, nodes,
10059                                              extent_cache, chunk_cache,
10060                                              dev_cache, block_group_cache,
10061                                              dev_extent_cache, rec);
10062                         if (ret != 0)
10063                                 break;
10064                 }
10065                 free_extent_buffer(buf);
10066                 list_del(&rec->list);
10067                 free(rec);
10068                 if (ret < 0)
10069                         break;
10070         }
10071         while (ret >= 0) {
10072                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10073                                      reada, nodes, extent_cache, chunk_cache,
10074                                      dev_cache, block_group_cache,
10075                                      dev_extent_cache, NULL);
10076                 if (ret != 0) {
10077                         if (ret > 0)
10078                                 ret = 0;
10079                         break;
10080                 }
10081         }
10082         return ret;
10083 }
10084
10085 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10086 {
10087         struct rb_root dev_cache;
10088         struct cache_tree chunk_cache;
10089         struct block_group_tree block_group_cache;
10090         struct device_extent_tree dev_extent_cache;
10091         struct cache_tree extent_cache;
10092         struct cache_tree seen;
10093         struct cache_tree pending;
10094         struct cache_tree reada;
10095         struct cache_tree nodes;
10096         struct extent_io_tree excluded_extents;
10097         struct cache_tree corrupt_blocks;
10098         struct btrfs_path path;
10099         struct btrfs_key key;
10100         struct btrfs_key found_key;
10101         int ret, err = 0;
10102         struct block_info *bits;
10103         int bits_nr;
10104         struct extent_buffer *leaf;
10105         int slot;
10106         struct btrfs_root_item ri;
10107         struct list_head dropping_trees;
10108         struct list_head normal_trees;
10109         struct btrfs_root *root1;
10110         struct btrfs_root *root;
10111         u64 objectid;
10112         u8 level;
10113
10114         root = fs_info->fs_root;
10115         dev_cache = RB_ROOT;
10116         cache_tree_init(&chunk_cache);
10117         block_group_tree_init(&block_group_cache);
10118         device_extent_tree_init(&dev_extent_cache);
10119
10120         cache_tree_init(&extent_cache);
10121         cache_tree_init(&seen);
10122         cache_tree_init(&pending);
10123         cache_tree_init(&nodes);
10124         cache_tree_init(&reada);
10125         cache_tree_init(&corrupt_blocks);
10126         extent_io_tree_init(&excluded_extents);
10127         INIT_LIST_HEAD(&dropping_trees);
10128         INIT_LIST_HEAD(&normal_trees);
10129
10130         if (repair) {
10131                 fs_info->excluded_extents = &excluded_extents;
10132                 fs_info->fsck_extent_cache = &extent_cache;
10133                 fs_info->free_extent_hook = free_extent_hook;
10134                 fs_info->corrupt_blocks = &corrupt_blocks;
10135         }
10136
10137         bits_nr = 1024;
10138         bits = malloc(bits_nr * sizeof(struct block_info));
10139         if (!bits) {
10140                 perror("malloc");
10141                 exit(1);
10142         }
10143
10144         if (ctx.progress_enabled) {
10145                 ctx.tp = TASK_EXTENTS;
10146                 task_start(ctx.info);
10147         }
10148
10149 again:
10150         root1 = fs_info->tree_root;
10151         level = btrfs_header_level(root1->node);
10152         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10153                                     root1->node->start, 0, level, 0, NULL);
10154         if (ret < 0)
10155                 goto out;
10156         root1 = fs_info->chunk_root;
10157         level = btrfs_header_level(root1->node);
10158         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10159                                     root1->node->start, 0, level, 0, NULL);
10160         if (ret < 0)
10161                 goto out;
10162         btrfs_init_path(&path);
10163         key.offset = 0;
10164         key.objectid = 0;
10165         key.type = BTRFS_ROOT_ITEM_KEY;
10166         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10167         if (ret < 0)
10168                 goto out;
10169         while(1) {
10170                 leaf = path.nodes[0];
10171                 slot = path.slots[0];
10172                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10173                         ret = btrfs_next_leaf(root, &path);
10174                         if (ret != 0)
10175                                 break;
10176                         leaf = path.nodes[0];
10177                         slot = path.slots[0];
10178                 }
10179                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10180                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10181                         unsigned long offset;
10182                         u64 last_snapshot;
10183
10184                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10185                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10186                         last_snapshot = btrfs_root_last_snapshot(&ri);
10187                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10188                                 level = btrfs_root_level(&ri);
10189                                 ret = add_root_item_to_list(&normal_trees,
10190                                                 found_key.objectid,
10191                                                 btrfs_root_bytenr(&ri),
10192                                                 last_snapshot, level,
10193                                                 0, NULL);
10194                                 if (ret < 0)
10195                                         goto out;
10196                         } else {
10197                                 level = btrfs_root_level(&ri);
10198                                 objectid = found_key.objectid;
10199                                 btrfs_disk_key_to_cpu(&found_key,
10200                                                       &ri.drop_progress);
10201                                 ret = add_root_item_to_list(&dropping_trees,
10202                                                 objectid,
10203                                                 btrfs_root_bytenr(&ri),
10204                                                 last_snapshot, level,
10205                                                 ri.drop_level, &found_key);
10206                                 if (ret < 0)
10207                                         goto out;
10208                         }
10209                 }
10210                 path.slots[0]++;
10211         }
10212         btrfs_release_path(&path);
10213
10214         /*
10215          * check_block can return -EAGAIN if it fixes something, please keep
10216          * this in mind when dealing with return values from these functions, if
10217          * we get -EAGAIN we want to fall through and restart the loop.
10218          */
10219         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10220                                   &seen, &reada, &nodes, &extent_cache,
10221                                   &chunk_cache, &dev_cache, &block_group_cache,
10222                                   &dev_extent_cache);
10223         if (ret < 0) {
10224                 if (ret == -EAGAIN)
10225                         goto loop;
10226                 goto out;
10227         }
10228         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10229                                   &pending, &seen, &reada, &nodes,
10230                                   &extent_cache, &chunk_cache, &dev_cache,
10231                                   &block_group_cache, &dev_extent_cache);
10232         if (ret < 0) {
10233                 if (ret == -EAGAIN)
10234                         goto loop;
10235                 goto out;
10236         }
10237
10238         ret = check_chunks(&chunk_cache, &block_group_cache,
10239                            &dev_extent_cache, NULL, NULL, NULL, 0);
10240         if (ret) {
10241                 if (ret == -EAGAIN)
10242                         goto loop;
10243                 err = ret;
10244         }
10245
10246         ret = check_extent_refs(root, &extent_cache);
10247         if (ret < 0) {
10248                 if (ret == -EAGAIN)
10249                         goto loop;
10250                 goto out;
10251         }
10252
10253         ret = check_devices(&dev_cache, &dev_extent_cache);
10254         if (ret && err)
10255                 ret = err;
10256
10257 out:
10258         task_stop(ctx.info);
10259         if (repair) {
10260                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10261                 extent_io_tree_cleanup(&excluded_extents);
10262                 fs_info->fsck_extent_cache = NULL;
10263                 fs_info->free_extent_hook = NULL;
10264                 fs_info->corrupt_blocks = NULL;
10265                 fs_info->excluded_extents = NULL;
10266         }
10267         free(bits);
10268         free_chunk_cache_tree(&chunk_cache);
10269         free_device_cache_tree(&dev_cache);
10270         free_block_group_tree(&block_group_cache);
10271         free_device_extent_tree(&dev_extent_cache);
10272         free_extent_cache_tree(&seen);
10273         free_extent_cache_tree(&pending);
10274         free_extent_cache_tree(&reada);
10275         free_extent_cache_tree(&nodes);
10276         free_root_item_list(&normal_trees);
10277         free_root_item_list(&dropping_trees);
10278         return ret;
10279 loop:
10280         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10281         free_extent_cache_tree(&seen);
10282         free_extent_cache_tree(&pending);
10283         free_extent_cache_tree(&reada);
10284         free_extent_cache_tree(&nodes);
10285         free_chunk_cache_tree(&chunk_cache);
10286         free_block_group_tree(&block_group_cache);
10287         free_device_cache_tree(&dev_cache);
10288         free_device_extent_tree(&dev_extent_cache);
10289         free_extent_record_cache(&extent_cache);
10290         free_root_item_list(&normal_trees);
10291         free_root_item_list(&dropping_trees);
10292         extent_io_tree_cleanup(&excluded_extents);
10293         goto again;
10294 }
10295
10296 /*
10297  * Check backrefs of a tree block given by @bytenr or @eb.
10298  *
10299  * @root:       the root containing the @bytenr or @eb
10300  * @eb:         tree block extent buffer, can be NULL
10301  * @bytenr:     bytenr of the tree block to search
10302  * @level:      tree level of the tree block
10303  * @owner:      owner of the tree block
10304  *
10305  * Return >0 for any error found and output error message
10306  * Return 0 for no error found
10307  */
10308 static int check_tree_block_ref(struct btrfs_root *root,
10309                                 struct extent_buffer *eb, u64 bytenr,
10310                                 int level, u64 owner)
10311 {
10312         struct btrfs_key key;
10313         struct btrfs_root *extent_root = root->fs_info->extent_root;
10314         struct btrfs_path path;
10315         struct btrfs_extent_item *ei;
10316         struct btrfs_extent_inline_ref *iref;
10317         struct extent_buffer *leaf;
10318         unsigned long end;
10319         unsigned long ptr;
10320         int slot;
10321         int skinny_level;
10322         int type;
10323         u32 nodesize = root->fs_info->nodesize;
10324         u32 item_size;
10325         u64 offset;
10326         int tree_reloc_root = 0;
10327         int found_ref = 0;
10328         int err = 0;
10329         int ret;
10330
10331         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10332             btrfs_header_bytenr(root->node) == bytenr)
10333                 tree_reloc_root = 1;
10334
10335         btrfs_init_path(&path);
10336         key.objectid = bytenr;
10337         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10338                 key.type = BTRFS_METADATA_ITEM_KEY;
10339         else
10340                 key.type = BTRFS_EXTENT_ITEM_KEY;
10341         key.offset = (u64)-1;
10342
10343         /* Search for the backref in extent tree */
10344         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10345         if (ret < 0) {
10346                 err |= BACKREF_MISSING;
10347                 goto out;
10348         }
10349         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10350         if (ret) {
10351                 err |= BACKREF_MISSING;
10352                 goto out;
10353         }
10354
10355         leaf = path.nodes[0];
10356         slot = path.slots[0];
10357         btrfs_item_key_to_cpu(leaf, &key, slot);
10358
10359         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10360
10361         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10362                 skinny_level = (int)key.offset;
10363                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10364         } else {
10365                 struct btrfs_tree_block_info *info;
10366
10367                 info = (struct btrfs_tree_block_info *)(ei + 1);
10368                 skinny_level = btrfs_tree_block_level(leaf, info);
10369                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10370         }
10371
10372         if (eb) {
10373                 u64 header_gen;
10374                 u64 extent_gen;
10375
10376                 if (!(btrfs_extent_flags(leaf, ei) &
10377                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10378                         error(
10379                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10380                                 key.objectid, nodesize,
10381                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10382                         err = BACKREF_MISMATCH;
10383                 }
10384                 header_gen = btrfs_header_generation(eb);
10385                 extent_gen = btrfs_extent_generation(leaf, ei);
10386                 if (header_gen != extent_gen) {
10387                         error(
10388         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10389                                 key.objectid, nodesize, header_gen,
10390                                 extent_gen);
10391                         err = BACKREF_MISMATCH;
10392                 }
10393                 if (level != skinny_level) {
10394                         error(
10395                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10396                                 key.objectid, nodesize, level, skinny_level);
10397                         err = BACKREF_MISMATCH;
10398                 }
10399                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10400                         error(
10401                         "extent[%llu %u] is referred by other roots than %llu",
10402                                 key.objectid, nodesize, root->objectid);
10403                         err = BACKREF_MISMATCH;
10404                 }
10405         }
10406
10407         /*
10408          * Iterate the extent/metadata item to find the exact backref
10409          */
10410         item_size = btrfs_item_size_nr(leaf, slot);
10411         ptr = (unsigned long)iref;
10412         end = (unsigned long)ei + item_size;
10413         while (ptr < end) {
10414                 iref = (struct btrfs_extent_inline_ref *)ptr;
10415                 type = btrfs_extent_inline_ref_type(leaf, iref);
10416                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10417
10418                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10419                         (offset == root->objectid || offset == owner)) {
10420                         found_ref = 1;
10421                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10422                         /*
10423                          * Backref of tree reloc root points to itself, no need
10424                          * to check backref any more.
10425                          */
10426                         if (tree_reloc_root)
10427                                 found_ref = 1;
10428                         else
10429                         /* Check if the backref points to valid referencer */
10430                                 found_ref = !check_tree_block_ref(root, NULL,
10431                                                 offset, level + 1, owner);
10432                 }
10433
10434                 if (found_ref)
10435                         break;
10436                 ptr += btrfs_extent_inline_ref_size(type);
10437         }
10438
10439         /*
10440          * Inlined extent item doesn't have what we need, check
10441          * TREE_BLOCK_REF_KEY
10442          */
10443         if (!found_ref) {
10444                 btrfs_release_path(&path);
10445                 key.objectid = bytenr;
10446                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10447                 key.offset = root->objectid;
10448
10449                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10450                 if (!ret)
10451                         found_ref = 1;
10452         }
10453         if (!found_ref)
10454                 err |= BACKREF_MISSING;
10455 out:
10456         btrfs_release_path(&path);
10457         if (eb && (err & BACKREF_MISSING))
10458                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10459                         bytenr, nodesize, owner, level);
10460         return err;
10461 }
10462
10463 /*
10464  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10465  *
10466  * Return >0 any error found and output error message
10467  * Return 0 for no error found
10468  */
10469 static int check_extent_data_item(struct btrfs_root *root,
10470                                   struct extent_buffer *eb, int slot)
10471 {
10472         struct btrfs_file_extent_item *fi;
10473         struct btrfs_path path;
10474         struct btrfs_root *extent_root = root->fs_info->extent_root;
10475         struct btrfs_key fi_key;
10476         struct btrfs_key dbref_key;
10477         struct extent_buffer *leaf;
10478         struct btrfs_extent_item *ei;
10479         struct btrfs_extent_inline_ref *iref;
10480         struct btrfs_extent_data_ref *dref;
10481         u64 owner;
10482         u64 disk_bytenr;
10483         u64 disk_num_bytes;
10484         u64 extent_num_bytes;
10485         u64 extent_flags;
10486         u32 item_size;
10487         unsigned long end;
10488         unsigned long ptr;
10489         int type;
10490         u64 ref_root;
10491         int found_dbackref = 0;
10492         int err = 0;
10493         int ret;
10494
10495         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10496         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10497
10498         /* Nothing to check for hole and inline data extents */
10499         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10500             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10501                 return 0;
10502
10503         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10504         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10505         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10506
10507         /* Check unaligned disk_num_bytes and num_bytes */
10508         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10509                 error(
10510 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10511                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10512                         root->fs_info->sectorsize);
10513                 err |= BYTES_UNALIGNED;
10514         } else {
10515                 data_bytes_allocated += disk_num_bytes;
10516         }
10517         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10518                 error(
10519 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10520                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10521                         root->fs_info->sectorsize);
10522                 err |= BYTES_UNALIGNED;
10523         } else {
10524                 data_bytes_referenced += extent_num_bytes;
10525         }
10526         owner = btrfs_header_owner(eb);
10527
10528         /* Check the extent item of the file extent in extent tree */
10529         btrfs_init_path(&path);
10530         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10531         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10532         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10533
10534         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10535         if (ret)
10536                 goto out;
10537
10538         leaf = path.nodes[0];
10539         slot = path.slots[0];
10540         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10541
10542         extent_flags = btrfs_extent_flags(leaf, ei);
10543
10544         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10545                 error(
10546                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10547                     disk_bytenr, disk_num_bytes,
10548                     BTRFS_EXTENT_FLAG_DATA);
10549                 err |= BACKREF_MISMATCH;
10550         }
10551
10552         /* Check data backref inside that extent item */
10553         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10554         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10555         ptr = (unsigned long)iref;
10556         end = (unsigned long)ei + item_size;
10557         while (ptr < end) {
10558                 iref = (struct btrfs_extent_inline_ref *)ptr;
10559                 type = btrfs_extent_inline_ref_type(leaf, iref);
10560                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10561
10562                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10563                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10564                         if (ref_root == owner || ref_root == root->objectid)
10565                                 found_dbackref = 1;
10566                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10567                         found_dbackref = !check_tree_block_ref(root, NULL,
10568                                 btrfs_extent_inline_ref_offset(leaf, iref),
10569                                 0, owner);
10570                 }
10571
10572                 if (found_dbackref)
10573                         break;
10574                 ptr += btrfs_extent_inline_ref_size(type);
10575         }
10576
10577         if (!found_dbackref) {
10578                 btrfs_release_path(&path);
10579
10580                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10581                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10582                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10583                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10584                                 fi_key.objectid, fi_key.offset);
10585
10586                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10587                                         &dbref_key, &path, 0, 0);
10588                 if (!ret) {
10589                         found_dbackref = 1;
10590                         goto out;
10591                 }
10592
10593                 btrfs_release_path(&path);
10594
10595                 /*
10596                  * Neither inlined nor EXTENT_DATA_REF found, try
10597                  * SHARED_DATA_REF as last chance.
10598                  */
10599                 dbref_key.objectid = disk_bytenr;
10600                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10601                 dbref_key.offset = eb->start;
10602
10603                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10604                                         &dbref_key, &path, 0, 0);
10605                 if (!ret) {
10606                         found_dbackref = 1;
10607                         goto out;
10608                 }
10609         }
10610
10611 out:
10612         if (!found_dbackref)
10613                 err |= BACKREF_MISSING;
10614         btrfs_release_path(&path);
10615         if (err & BACKREF_MISSING) {
10616                 error("data extent[%llu %llu] backref lost",
10617                       disk_bytenr, disk_num_bytes);
10618         }
10619         return err;
10620 }
10621
10622 /*
10623  * Get real tree block level for the case like shared block
10624  * Return >= 0 as tree level
10625  * Return <0 for error
10626  */
10627 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10628 {
10629         struct extent_buffer *eb;
10630         struct btrfs_path path;
10631         struct btrfs_key key;
10632         struct btrfs_extent_item *ei;
10633         u64 flags;
10634         u64 transid;
10635         u8 backref_level;
10636         u8 header_level;
10637         int ret;
10638
10639         /* Search extent tree for extent generation and level */
10640         key.objectid = bytenr;
10641         key.type = BTRFS_METADATA_ITEM_KEY;
10642         key.offset = (u64)-1;
10643
10644         btrfs_init_path(&path);
10645         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10646         if (ret < 0)
10647                 goto release_out;
10648         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10649         if (ret < 0)
10650                 goto release_out;
10651         if (ret > 0) {
10652                 ret = -ENOENT;
10653                 goto release_out;
10654         }
10655
10656         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10657         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10658                             struct btrfs_extent_item);
10659         flags = btrfs_extent_flags(path.nodes[0], ei);
10660         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10661                 ret = -ENOENT;
10662                 goto release_out;
10663         }
10664
10665         /* Get transid for later read_tree_block() check */
10666         transid = btrfs_extent_generation(path.nodes[0], ei);
10667
10668         /* Get backref level as one source */
10669         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10670                 backref_level = key.offset;
10671         } else {
10672                 struct btrfs_tree_block_info *info;
10673
10674                 info = (struct btrfs_tree_block_info *)(ei + 1);
10675                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10676         }
10677         btrfs_release_path(&path);
10678
10679         /* Get level from tree block as an alternative source */
10680         eb = read_tree_block(fs_info, bytenr, transid);
10681         if (!extent_buffer_uptodate(eb)) {
10682                 free_extent_buffer(eb);
10683                 return -EIO;
10684         }
10685         header_level = btrfs_header_level(eb);
10686         free_extent_buffer(eb);
10687
10688         if (header_level != backref_level)
10689                 return -EIO;
10690         return header_level;
10691
10692 release_out:
10693         btrfs_release_path(&path);
10694         return ret;
10695 }
10696
10697 /*
10698  * Check if a tree block backref is valid (points to a valid tree block)
10699  * if level == -1, level will be resolved
10700  * Return >0 for any error found and print error message
10701  */
10702 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10703                                     u64 bytenr, int level)
10704 {
10705         struct btrfs_root *root;
10706         struct btrfs_key key;
10707         struct btrfs_path path;
10708         struct extent_buffer *eb;
10709         struct extent_buffer *node;
10710         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10711         int err = 0;
10712         int ret;
10713
10714         /* Query level for level == -1 special case */
10715         if (level == -1)
10716                 level = query_tree_block_level(fs_info, bytenr);
10717         if (level < 0) {
10718                 err |= REFERENCER_MISSING;
10719                 goto out;
10720         }
10721
10722         key.objectid = root_id;
10723         key.type = BTRFS_ROOT_ITEM_KEY;
10724         key.offset = (u64)-1;
10725
10726         root = btrfs_read_fs_root(fs_info, &key);
10727         if (IS_ERR(root)) {
10728                 err |= REFERENCER_MISSING;
10729                 goto out;
10730         }
10731
10732         /* Read out the tree block to get item/node key */
10733         eb = read_tree_block(fs_info, bytenr, 0);
10734         if (!extent_buffer_uptodate(eb)) {
10735                 err |= REFERENCER_MISSING;
10736                 free_extent_buffer(eb);
10737                 goto out;
10738         }
10739
10740         /* Empty tree, no need to check key */
10741         if (!btrfs_header_nritems(eb) && !level) {
10742                 free_extent_buffer(eb);
10743                 goto out;
10744         }
10745
10746         if (level)
10747                 btrfs_node_key_to_cpu(eb, &key, 0);
10748         else
10749                 btrfs_item_key_to_cpu(eb, &key, 0);
10750
10751         free_extent_buffer(eb);
10752
10753         btrfs_init_path(&path);
10754         path.lowest_level = level;
10755         /* Search with the first key, to ensure we can reach it */
10756         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10757         if (ret < 0) {
10758                 err |= REFERENCER_MISSING;
10759                 goto release_out;
10760         }
10761
10762         node = path.nodes[level];
10763         if (btrfs_header_bytenr(node) != bytenr) {
10764                 error(
10765         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10766                         bytenr, nodesize, bytenr,
10767                         btrfs_header_bytenr(node));
10768                 err |= REFERENCER_MISMATCH;
10769         }
10770         if (btrfs_header_level(node) != level) {
10771                 error(
10772         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10773                         bytenr, nodesize, level,
10774                         btrfs_header_level(node));
10775                 err |= REFERENCER_MISMATCH;
10776         }
10777
10778 release_out:
10779         btrfs_release_path(&path);
10780 out:
10781         if (err & REFERENCER_MISSING) {
10782                 if (level < 0)
10783                         error("extent [%llu %d] lost referencer (owner: %llu)",
10784                                 bytenr, nodesize, root_id);
10785                 else
10786                         error(
10787                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10788                                 bytenr, nodesize, root_id, level);
10789         }
10790
10791         return err;
10792 }
10793
10794 /*
10795  * Check if tree block @eb is tree reloc root.
10796  * Return 0 if it's not or any problem happens
10797  * Return 1 if it's a tree reloc root
10798  */
10799 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10800                                  struct extent_buffer *eb)
10801 {
10802         struct btrfs_root *tree_reloc_root;
10803         struct btrfs_key key;
10804         u64 bytenr = btrfs_header_bytenr(eb);
10805         u64 owner = btrfs_header_owner(eb);
10806         int ret = 0;
10807
10808         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10809         key.offset = owner;
10810         key.type = BTRFS_ROOT_ITEM_KEY;
10811
10812         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10813         if (IS_ERR(tree_reloc_root))
10814                 return 0;
10815
10816         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10817                 ret = 1;
10818         btrfs_free_fs_root(tree_reloc_root);
10819         return ret;
10820 }
10821
10822 /*
10823  * Check referencer for shared block backref
10824  * If level == -1, this function will resolve the level.
10825  */
10826 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10827                                      u64 parent, u64 bytenr, int level)
10828 {
10829         struct extent_buffer *eb;
10830         u32 nr;
10831         int found_parent = 0;
10832         int i;
10833
10834         eb = read_tree_block(fs_info, parent, 0);
10835         if (!extent_buffer_uptodate(eb))
10836                 goto out;
10837
10838         if (level == -1)
10839                 level = query_tree_block_level(fs_info, bytenr);
10840         if (level < 0)
10841                 goto out;
10842
10843         /* It's possible it's a tree reloc root */
10844         if (parent == bytenr) {
10845                 if (is_tree_reloc_root(fs_info, eb))
10846                         found_parent = 1;
10847                 goto out;
10848         }
10849
10850         if (level + 1 != btrfs_header_level(eb))
10851                 goto out;
10852
10853         nr = btrfs_header_nritems(eb);
10854         for (i = 0; i < nr; i++) {
10855                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10856                         found_parent = 1;
10857                         break;
10858                 }
10859         }
10860 out:
10861         free_extent_buffer(eb);
10862         if (!found_parent) {
10863                 error(
10864         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10865                         bytenr, fs_info->nodesize, parent, level);
10866                 return REFERENCER_MISSING;
10867         }
10868         return 0;
10869 }
10870
10871 /*
10872  * Check referencer for normal (inlined) data ref
10873  * If len == 0, it will be resolved by searching in extent tree
10874  */
10875 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10876                                      u64 root_id, u64 objectid, u64 offset,
10877                                      u64 bytenr, u64 len, u32 count)
10878 {
10879         struct btrfs_root *root;
10880         struct btrfs_root *extent_root = fs_info->extent_root;
10881         struct btrfs_key key;
10882         struct btrfs_path path;
10883         struct extent_buffer *leaf;
10884         struct btrfs_file_extent_item *fi;
10885         u32 found_count = 0;
10886         int slot;
10887         int ret = 0;
10888
10889         if (!len) {
10890                 key.objectid = bytenr;
10891                 key.type = BTRFS_EXTENT_ITEM_KEY;
10892                 key.offset = (u64)-1;
10893
10894                 btrfs_init_path(&path);
10895                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10896                 if (ret < 0)
10897                         goto out;
10898                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10899                 if (ret)
10900                         goto out;
10901                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10902                 if (key.objectid != bytenr ||
10903                     key.type != BTRFS_EXTENT_ITEM_KEY)
10904                         goto out;
10905                 len = key.offset;
10906                 btrfs_release_path(&path);
10907         }
10908         key.objectid = root_id;
10909         key.type = BTRFS_ROOT_ITEM_KEY;
10910         key.offset = (u64)-1;
10911         btrfs_init_path(&path);
10912
10913         root = btrfs_read_fs_root(fs_info, &key);
10914         if (IS_ERR(root))
10915                 goto out;
10916
10917         key.objectid = objectid;
10918         key.type = BTRFS_EXTENT_DATA_KEY;
10919         /*
10920          * It can be nasty as data backref offset is
10921          * file offset - file extent offset, which is smaller or
10922          * equal to original backref offset.  The only special case is
10923          * overflow.  So we need to special check and do further search.
10924          */
10925         key.offset = offset & (1ULL << 63) ? 0 : offset;
10926
10927         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10928         if (ret < 0)
10929                 goto out;
10930
10931         /*
10932          * Search afterwards to get correct one
10933          * NOTE: As we must do a comprehensive check on the data backref to
10934          * make sure the dref count also matches, we must iterate all file
10935          * extents for that inode.
10936          */
10937         while (1) {
10938                 leaf = path.nodes[0];
10939                 slot = path.slots[0];
10940
10941                 if (slot >= btrfs_header_nritems(leaf))
10942                         goto next;
10943                 btrfs_item_key_to_cpu(leaf, &key, slot);
10944                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10945                         break;
10946                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10947                 /*
10948                  * Except normal disk bytenr and disk num bytes, we still
10949                  * need to do extra check on dbackref offset as
10950                  * dbackref offset = file_offset - file_extent_offset
10951                  */
10952                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10953                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10954                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10955                     offset)
10956                         found_count++;
10957
10958 next:
10959                 ret = btrfs_next_item(root, &path);
10960                 if (ret)
10961                         break;
10962         }
10963 out:
10964         btrfs_release_path(&path);
10965         if (found_count != count) {
10966                 error(
10967 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10968                         bytenr, len, root_id, objectid, offset, count, found_count);
10969                 return REFERENCER_MISSING;
10970         }
10971         return 0;
10972 }
10973
10974 /*
10975  * Check if the referencer of a shared data backref exists
10976  */
10977 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10978                                      u64 parent, u64 bytenr)
10979 {
10980         struct extent_buffer *eb;
10981         struct btrfs_key key;
10982         struct btrfs_file_extent_item *fi;
10983         u32 nr;
10984         int found_parent = 0;
10985         int i;
10986
10987         eb = read_tree_block(fs_info, parent, 0);
10988         if (!extent_buffer_uptodate(eb))
10989                 goto out;
10990
10991         nr = btrfs_header_nritems(eb);
10992         for (i = 0; i < nr; i++) {
10993                 btrfs_item_key_to_cpu(eb, &key, i);
10994                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10995                         continue;
10996
10997                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10998                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10999                         continue;
11000
11001                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11002                         found_parent = 1;
11003                         break;
11004                 }
11005         }
11006
11007 out:
11008         free_extent_buffer(eb);
11009         if (!found_parent) {
11010                 error("shared extent %llu referencer lost (parent: %llu)",
11011                         bytenr, parent);
11012                 return REFERENCER_MISSING;
11013         }
11014         return 0;
11015 }
11016
11017 /*
11018  * This function will check a given extent item, including its backref and
11019  * itself (like crossing stripe boundary and type)
11020  *
11021  * Since we don't use extent_record anymore, introduce new error bit
11022  */
11023 static int check_extent_item(struct btrfs_fs_info *fs_info,
11024                              struct extent_buffer *eb, int slot)
11025 {
11026         struct btrfs_extent_item *ei;
11027         struct btrfs_extent_inline_ref *iref;
11028         struct btrfs_extent_data_ref *dref;
11029         unsigned long end;
11030         unsigned long ptr;
11031         int type;
11032         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11033         u32 item_size = btrfs_item_size_nr(eb, slot);
11034         u64 flags;
11035         u64 offset;
11036         int metadata = 0;
11037         int level;
11038         struct btrfs_key key;
11039         int ret;
11040         int err = 0;
11041
11042         btrfs_item_key_to_cpu(eb, &key, slot);
11043         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11044                 bytes_used += key.offset;
11045         else
11046                 bytes_used += nodesize;
11047
11048         if (item_size < sizeof(*ei)) {
11049                 /*
11050                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11051                  * old thing when on disk format is still un-determined.
11052                  * No need to care about it anymore
11053                  */
11054                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11055                 return -ENOTTY;
11056         }
11057
11058         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11059         flags = btrfs_extent_flags(eb, ei);
11060
11061         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11062                 metadata = 1;
11063         if (metadata && check_crossing_stripes(global_info, key.objectid,
11064                                                eb->len)) {
11065                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11066                       key.objectid, key.objectid + nodesize);
11067                 err |= CROSSING_STRIPE_BOUNDARY;
11068         }
11069
11070         ptr = (unsigned long)(ei + 1);
11071
11072         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11073                 /* Old EXTENT_ITEM metadata */
11074                 struct btrfs_tree_block_info *info;
11075
11076                 info = (struct btrfs_tree_block_info *)ptr;
11077                 level = btrfs_tree_block_level(eb, info);
11078                 ptr += sizeof(struct btrfs_tree_block_info);
11079         } else {
11080                 /* New METADATA_ITEM */
11081                 level = key.offset;
11082         }
11083         end = (unsigned long)ei + item_size;
11084
11085 next:
11086         /* Reached extent item end normally */
11087         if (ptr == end)
11088                 goto out;
11089
11090         /* Beyond extent item end, wrong item size */
11091         if (ptr > end) {
11092                 err |= ITEM_SIZE_MISMATCH;
11093                 error("extent item at bytenr %llu slot %d has wrong size",
11094                         eb->start, slot);
11095                 goto out;
11096         }
11097
11098         /* Now check every backref in this extent item */
11099         iref = (struct btrfs_extent_inline_ref *)ptr;
11100         type = btrfs_extent_inline_ref_type(eb, iref);
11101         offset = btrfs_extent_inline_ref_offset(eb, iref);
11102         switch (type) {
11103         case BTRFS_TREE_BLOCK_REF_KEY:
11104                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11105                                                level);
11106                 err |= ret;
11107                 break;
11108         case BTRFS_SHARED_BLOCK_REF_KEY:
11109                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11110                                                  level);
11111                 err |= ret;
11112                 break;
11113         case BTRFS_EXTENT_DATA_REF_KEY:
11114                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11115                 ret = check_extent_data_backref(fs_info,
11116                                 btrfs_extent_data_ref_root(eb, dref),
11117                                 btrfs_extent_data_ref_objectid(eb, dref),
11118                                 btrfs_extent_data_ref_offset(eb, dref),
11119                                 key.objectid, key.offset,
11120                                 btrfs_extent_data_ref_count(eb, dref));
11121                 err |= ret;
11122                 break;
11123         case BTRFS_SHARED_DATA_REF_KEY:
11124                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11125                 err |= ret;
11126                 break;
11127         default:
11128                 error("extent[%llu %d %llu] has unknown ref type: %d",
11129                         key.objectid, key.type, key.offset, type);
11130                 err |= UNKNOWN_TYPE;
11131                 goto out;
11132         }
11133
11134         ptr += btrfs_extent_inline_ref_size(type);
11135         goto next;
11136
11137 out:
11138         return err;
11139 }
11140
11141 /*
11142  * Check if a dev extent item is referred correctly by its chunk
11143  */
11144 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11145                                  struct extent_buffer *eb, int slot)
11146 {
11147         struct btrfs_root *chunk_root = fs_info->chunk_root;
11148         struct btrfs_dev_extent *ptr;
11149         struct btrfs_path path;
11150         struct btrfs_key chunk_key;
11151         struct btrfs_key devext_key;
11152         struct btrfs_chunk *chunk;
11153         struct extent_buffer *l;
11154         int num_stripes;
11155         u64 length;
11156         int i;
11157         int found_chunk = 0;
11158         int ret;
11159
11160         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11161         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11162         length = btrfs_dev_extent_length(eb, ptr);
11163
11164         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11165         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11166         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11167
11168         btrfs_init_path(&path);
11169         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11170         if (ret)
11171                 goto out;
11172
11173         l = path.nodes[0];
11174         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11175         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11176                                       chunk_key.offset);
11177         if (ret < 0)
11178                 goto out;
11179
11180         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11181                 goto out;
11182
11183         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11184         for (i = 0; i < num_stripes; i++) {
11185                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11186                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11187
11188                 if (devid == devext_key.objectid &&
11189                     offset == devext_key.offset) {
11190                         found_chunk = 1;
11191                         break;
11192                 }
11193         }
11194 out:
11195         btrfs_release_path(&path);
11196         if (!found_chunk) {
11197                 error(
11198                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11199                         devext_key.objectid, devext_key.offset, length);
11200                 return REFERENCER_MISSING;
11201         }
11202         return 0;
11203 }
11204
11205 /*
11206  * Check if the used space is correct with the dev item
11207  */
11208 static int check_dev_item(struct btrfs_fs_info *fs_info,
11209                           struct extent_buffer *eb, int slot)
11210 {
11211         struct btrfs_root *dev_root = fs_info->dev_root;
11212         struct btrfs_dev_item *dev_item;
11213         struct btrfs_path path;
11214         struct btrfs_key key;
11215         struct btrfs_dev_extent *ptr;
11216         u64 dev_id;
11217         u64 used;
11218         u64 total = 0;
11219         int ret;
11220
11221         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11222         dev_id = btrfs_device_id(eb, dev_item);
11223         used = btrfs_device_bytes_used(eb, dev_item);
11224
11225         key.objectid = dev_id;
11226         key.type = BTRFS_DEV_EXTENT_KEY;
11227         key.offset = 0;
11228
11229         btrfs_init_path(&path);
11230         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11231         if (ret < 0) {
11232                 btrfs_item_key_to_cpu(eb, &key, slot);
11233                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11234                         key.objectid, key.type, key.offset);
11235                 btrfs_release_path(&path);
11236                 return REFERENCER_MISSING;
11237         }
11238
11239         /* Iterate dev_extents to calculate the used space of a device */
11240         while (1) {
11241                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11242                         goto next;
11243
11244                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11245                 if (key.objectid > dev_id)
11246                         break;
11247                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11248                         goto next;
11249
11250                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11251                                      struct btrfs_dev_extent);
11252                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11253 next:
11254                 ret = btrfs_next_item(dev_root, &path);
11255                 if (ret)
11256                         break;
11257         }
11258         btrfs_release_path(&path);
11259
11260         if (used != total) {
11261                 btrfs_item_key_to_cpu(eb, &key, slot);
11262                 error(
11263 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11264                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11265                         BTRFS_DEV_EXTENT_KEY, dev_id);
11266                 return ACCOUNTING_MISMATCH;
11267         }
11268         return 0;
11269 }
11270
11271 /*
11272  * Check a block group item with its referener (chunk) and its used space
11273  * with extent/metadata item
11274  */
11275 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11276                                   struct extent_buffer *eb, int slot)
11277 {
11278         struct btrfs_root *extent_root = fs_info->extent_root;
11279         struct btrfs_root *chunk_root = fs_info->chunk_root;
11280         struct btrfs_block_group_item *bi;
11281         struct btrfs_block_group_item bg_item;
11282         struct btrfs_path path;
11283         struct btrfs_key bg_key;
11284         struct btrfs_key chunk_key;
11285         struct btrfs_key extent_key;
11286         struct btrfs_chunk *chunk;
11287         struct extent_buffer *leaf;
11288         struct btrfs_extent_item *ei;
11289         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11290         u64 flags;
11291         u64 bg_flags;
11292         u64 used;
11293         u64 total = 0;
11294         int ret;
11295         int err = 0;
11296
11297         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11298         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11299         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11300         used = btrfs_block_group_used(&bg_item);
11301         bg_flags = btrfs_block_group_flags(&bg_item);
11302
11303         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11304         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11305         chunk_key.offset = bg_key.objectid;
11306
11307         btrfs_init_path(&path);
11308         /* Search for the referencer chunk */
11309         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11310         if (ret) {
11311                 error(
11312                 "block group[%llu %llu] did not find the related chunk item",
11313                         bg_key.objectid, bg_key.offset);
11314                 err |= REFERENCER_MISSING;
11315         } else {
11316                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11317                                         struct btrfs_chunk);
11318                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11319                                                 bg_key.offset) {
11320                         error(
11321         "block group[%llu %llu] related chunk item length does not match",
11322                                 bg_key.objectid, bg_key.offset);
11323                         err |= REFERENCER_MISMATCH;
11324                 }
11325         }
11326         btrfs_release_path(&path);
11327
11328         /* Search from the block group bytenr */
11329         extent_key.objectid = bg_key.objectid;
11330         extent_key.type = 0;
11331         extent_key.offset = 0;
11332
11333         btrfs_init_path(&path);
11334         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11335         if (ret < 0)
11336                 goto out;
11337
11338         /* Iterate extent tree to account used space */
11339         while (1) {
11340                 leaf = path.nodes[0];
11341
11342                 /* Search slot can point to the last item beyond leaf nritems */
11343                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11344                         goto next;
11345
11346                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11347                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11348                         break;
11349
11350                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11351                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11352                         goto next;
11353                 if (extent_key.objectid < bg_key.objectid)
11354                         goto next;
11355
11356                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11357                         total += nodesize;
11358                 else
11359                         total += extent_key.offset;
11360
11361                 ei = btrfs_item_ptr(leaf, path.slots[0],
11362                                     struct btrfs_extent_item);
11363                 flags = btrfs_extent_flags(leaf, ei);
11364                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11365                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11366                                 error(
11367                         "bad extent[%llu, %llu) type mismatch with chunk",
11368                                         extent_key.objectid,
11369                                         extent_key.objectid + extent_key.offset);
11370                                 err |= CHUNK_TYPE_MISMATCH;
11371                         }
11372                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11373                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11374                                     BTRFS_BLOCK_GROUP_METADATA))) {
11375                                 error(
11376                         "bad extent[%llu, %llu) type mismatch with chunk",
11377                                         extent_key.objectid,
11378                                         extent_key.objectid + nodesize);
11379                                 err |= CHUNK_TYPE_MISMATCH;
11380                         }
11381                 }
11382 next:
11383                 ret = btrfs_next_item(extent_root, &path);
11384                 if (ret)
11385                         break;
11386         }
11387
11388 out:
11389         btrfs_release_path(&path);
11390
11391         if (total != used) {
11392                 error(
11393                 "block group[%llu %llu] used %llu but extent items used %llu",
11394                         bg_key.objectid, bg_key.offset, used, total);
11395                 err |= ACCOUNTING_MISMATCH;
11396         }
11397         return err;
11398 }
11399
11400 /*
11401  * Check a chunk item.
11402  * Including checking all referred dev_extents and block group
11403  */
11404 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11405                             struct extent_buffer *eb, int slot)
11406 {
11407         struct btrfs_root *extent_root = fs_info->extent_root;
11408         struct btrfs_root *dev_root = fs_info->dev_root;
11409         struct btrfs_path path;
11410         struct btrfs_key chunk_key;
11411         struct btrfs_key bg_key;
11412         struct btrfs_key devext_key;
11413         struct btrfs_chunk *chunk;
11414         struct extent_buffer *leaf;
11415         struct btrfs_block_group_item *bi;
11416         struct btrfs_block_group_item bg_item;
11417         struct btrfs_dev_extent *ptr;
11418         u64 length;
11419         u64 chunk_end;
11420         u64 stripe_len;
11421         u64 type;
11422         int num_stripes;
11423         u64 offset;
11424         u64 objectid;
11425         int i;
11426         int ret;
11427         int err = 0;
11428
11429         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11430         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11431         length = btrfs_chunk_length(eb, chunk);
11432         chunk_end = chunk_key.offset + length;
11433         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11434                                       chunk_key.offset);
11435         if (ret < 0) {
11436                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11437                         chunk_end);
11438                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11439                 goto out;
11440         }
11441         type = btrfs_chunk_type(eb, chunk);
11442
11443         bg_key.objectid = chunk_key.offset;
11444         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11445         bg_key.offset = length;
11446
11447         btrfs_init_path(&path);
11448         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11449         if (ret) {
11450                 error(
11451                 "chunk[%llu %llu) did not find the related block group item",
11452                         chunk_key.offset, chunk_end);
11453                 err |= REFERENCER_MISSING;
11454         } else{
11455                 leaf = path.nodes[0];
11456                 bi = btrfs_item_ptr(leaf, path.slots[0],
11457                                     struct btrfs_block_group_item);
11458                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11459                                    sizeof(bg_item));
11460                 if (btrfs_block_group_flags(&bg_item) != type) {
11461                         error(
11462 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11463                                 chunk_key.offset, chunk_end, type,
11464                                 btrfs_block_group_flags(&bg_item));
11465                         err |= REFERENCER_MISSING;
11466                 }
11467         }
11468
11469         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11470         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11471         for (i = 0; i < num_stripes; i++) {
11472                 btrfs_release_path(&path);
11473                 btrfs_init_path(&path);
11474                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11475                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11476                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11477
11478                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11479                                         0, 0);
11480                 if (ret)
11481                         goto not_match_dev;
11482
11483                 leaf = path.nodes[0];
11484                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11485                                      struct btrfs_dev_extent);
11486                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11487                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11488                 if (objectid != chunk_key.objectid ||
11489                     offset != chunk_key.offset ||
11490                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11491                         goto not_match_dev;
11492                 continue;
11493 not_match_dev:
11494                 err |= BACKREF_MISSING;
11495                 error(
11496                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11497                         chunk_key.objectid, chunk_end, i);
11498                 continue;
11499         }
11500         btrfs_release_path(&path);
11501 out:
11502         return err;
11503 }
11504
11505 /*
11506  * Main entry function to check known items and update related accounting info
11507  */
11508 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11509 {
11510         struct btrfs_fs_info *fs_info = root->fs_info;
11511         struct btrfs_key key;
11512         int slot = 0;
11513         int type;
11514         struct btrfs_extent_data_ref *dref;
11515         int ret;
11516         int err = 0;
11517
11518 next:
11519         btrfs_item_key_to_cpu(eb, &key, slot);
11520         type = key.type;
11521
11522         switch (type) {
11523         case BTRFS_EXTENT_DATA_KEY:
11524                 ret = check_extent_data_item(root, eb, slot);
11525                 err |= ret;
11526                 break;
11527         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11528                 ret = check_block_group_item(fs_info, eb, slot);
11529                 err |= ret;
11530                 break;
11531         case BTRFS_DEV_ITEM_KEY:
11532                 ret = check_dev_item(fs_info, eb, slot);
11533                 err |= ret;
11534                 break;
11535         case BTRFS_CHUNK_ITEM_KEY:
11536                 ret = check_chunk_item(fs_info, eb, slot);
11537                 err |= ret;
11538                 break;
11539         case BTRFS_DEV_EXTENT_KEY:
11540                 ret = check_dev_extent_item(fs_info, eb, slot);
11541                 err |= ret;
11542                 break;
11543         case BTRFS_EXTENT_ITEM_KEY:
11544         case BTRFS_METADATA_ITEM_KEY:
11545                 ret = check_extent_item(fs_info, eb, slot);
11546                 err |= ret;
11547                 break;
11548         case BTRFS_EXTENT_CSUM_KEY:
11549                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11550                 break;
11551         case BTRFS_TREE_BLOCK_REF_KEY:
11552                 ret = check_tree_block_backref(fs_info, key.offset,
11553                                                key.objectid, -1);
11554                 err |= ret;
11555                 break;
11556         case BTRFS_EXTENT_DATA_REF_KEY:
11557                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11558                 ret = check_extent_data_backref(fs_info,
11559                                 btrfs_extent_data_ref_root(eb, dref),
11560                                 btrfs_extent_data_ref_objectid(eb, dref),
11561                                 btrfs_extent_data_ref_offset(eb, dref),
11562                                 key.objectid, 0,
11563                                 btrfs_extent_data_ref_count(eb, dref));
11564                 err |= ret;
11565                 break;
11566         case BTRFS_SHARED_BLOCK_REF_KEY:
11567                 ret = check_shared_block_backref(fs_info, key.offset,
11568                                                  key.objectid, -1);
11569                 err |= ret;
11570                 break;
11571         case BTRFS_SHARED_DATA_REF_KEY:
11572                 ret = check_shared_data_backref(fs_info, key.offset,
11573                                                 key.objectid);
11574                 err |= ret;
11575                 break;
11576         default:
11577                 break;
11578         }
11579
11580         if (++slot < btrfs_header_nritems(eb))
11581                 goto next;
11582
11583         return err;
11584 }
11585
11586 /*
11587  * Helper function for later fs/subvol tree check.  To determine if a tree
11588  * block should be checked.
11589  * This function will ensure only the direct referencer with lowest rootid to
11590  * check a fs/subvolume tree block.
11591  *
11592  * Backref check at extent tree would detect errors like missing subvolume
11593  * tree, so we can do aggressive check to reduce duplicated checks.
11594  */
11595 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11596 {
11597         struct btrfs_root *extent_root = root->fs_info->extent_root;
11598         struct btrfs_key key;
11599         struct btrfs_path path;
11600         struct extent_buffer *leaf;
11601         int slot;
11602         struct btrfs_extent_item *ei;
11603         unsigned long ptr;
11604         unsigned long end;
11605         int type;
11606         u32 item_size;
11607         u64 offset;
11608         struct btrfs_extent_inline_ref *iref;
11609         int ret;
11610
11611         btrfs_init_path(&path);
11612         key.objectid = btrfs_header_bytenr(eb);
11613         key.type = BTRFS_METADATA_ITEM_KEY;
11614         key.offset = (u64)-1;
11615
11616         /*
11617          * Any failure in backref resolving means we can't determine
11618          * whom the tree block belongs to.
11619          * So in that case, we need to check that tree block
11620          */
11621         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11622         if (ret < 0)
11623                 goto need_check;
11624
11625         ret = btrfs_previous_extent_item(extent_root, &path,
11626                                          btrfs_header_bytenr(eb));
11627         if (ret)
11628                 goto need_check;
11629
11630         leaf = path.nodes[0];
11631         slot = path.slots[0];
11632         btrfs_item_key_to_cpu(leaf, &key, slot);
11633         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11634
11635         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11636                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11637         } else {
11638                 struct btrfs_tree_block_info *info;
11639
11640                 info = (struct btrfs_tree_block_info *)(ei + 1);
11641                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11642         }
11643
11644         item_size = btrfs_item_size_nr(leaf, slot);
11645         ptr = (unsigned long)iref;
11646         end = (unsigned long)ei + item_size;
11647         while (ptr < end) {
11648                 iref = (struct btrfs_extent_inline_ref *)ptr;
11649                 type = btrfs_extent_inline_ref_type(leaf, iref);
11650                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11651
11652                 /*
11653                  * We only check the tree block if current root is
11654                  * the lowest referencer of it.
11655                  */
11656                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11657                     offset < root->objectid) {
11658                         btrfs_release_path(&path);
11659                         return 0;
11660                 }
11661
11662                 ptr += btrfs_extent_inline_ref_size(type);
11663         }
11664         /*
11665          * Normally we should also check keyed tree block ref, but that may be
11666          * very time consuming.  Inlined ref should already make us skip a lot
11667          * of refs now.  So skip search keyed tree block ref.
11668          */
11669
11670 need_check:
11671         btrfs_release_path(&path);
11672         return 1;
11673 }
11674
11675 /*
11676  * Traversal function for tree block. We will do:
11677  * 1) Skip shared fs/subvolume tree blocks
11678  * 2) Update related bytes accounting
11679  * 3) Pre-order traversal
11680  */
11681 static int traverse_tree_block(struct btrfs_root *root,
11682                                 struct extent_buffer *node)
11683 {
11684         struct extent_buffer *eb;
11685         struct btrfs_key key;
11686         struct btrfs_key drop_key;
11687         int level;
11688         u64 nr;
11689         int i;
11690         int err = 0;
11691         int ret;
11692
11693         /*
11694          * Skip shared fs/subvolume tree block, in that case they will
11695          * be checked by referencer with lowest rootid
11696          */
11697         if (is_fstree(root->objectid) && !should_check(root, node))
11698                 return 0;
11699
11700         /* Update bytes accounting */
11701         total_btree_bytes += node->len;
11702         if (fs_root_objectid(btrfs_header_owner(node)))
11703                 total_fs_tree_bytes += node->len;
11704         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11705                 total_extent_tree_bytes += node->len;
11706
11707         /* pre-order tranversal, check itself first */
11708         level = btrfs_header_level(node);
11709         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11710                                    btrfs_header_level(node),
11711                                    btrfs_header_owner(node));
11712         err |= ret;
11713         if (err)
11714                 error(
11715         "check %s failed root %llu bytenr %llu level %d, force continue check",
11716                         level ? "node":"leaf", root->objectid,
11717                         btrfs_header_bytenr(node), btrfs_header_level(node));
11718
11719         if (!level) {
11720                 btree_space_waste += btrfs_leaf_free_space(root, node);
11721                 ret = check_leaf_items(root, node);
11722                 err |= ret;
11723                 return err;
11724         }
11725
11726         nr = btrfs_header_nritems(node);
11727         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11728         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11729                 sizeof(struct btrfs_key_ptr);
11730
11731         /* Then check all its children */
11732         for (i = 0; i < nr; i++) {
11733                 u64 blocknr = btrfs_node_blockptr(node, i);
11734
11735                 btrfs_node_key_to_cpu(node, &key, i);
11736                 if (level == root->root_item.drop_level &&
11737                     is_dropped_key(&key, &drop_key))
11738                         continue;
11739
11740                 /*
11741                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11742                  * to call the function itself.
11743                  */
11744                 eb = read_tree_block(root->fs_info, blocknr, 0);
11745                 if (extent_buffer_uptodate(eb)) {
11746                         ret = traverse_tree_block(root, eb);
11747                         err |= ret;
11748                 }
11749                 free_extent_buffer(eb);
11750         }
11751
11752         return err;
11753 }
11754
11755 /*
11756  * Low memory usage version check_chunks_and_extents.
11757  */
11758 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11759 {
11760         struct btrfs_path path;
11761         struct btrfs_key key;
11762         struct btrfs_root *root1;
11763         struct btrfs_root *root;
11764         struct btrfs_root *cur_root;
11765         int err = 0;
11766         int ret;
11767
11768         root = fs_info->fs_root;
11769
11770         root1 = root->fs_info->chunk_root;
11771         ret = traverse_tree_block(root1, root1->node);
11772         err |= ret;
11773
11774         root1 = root->fs_info->tree_root;
11775         ret = traverse_tree_block(root1, root1->node);
11776         err |= ret;
11777
11778         btrfs_init_path(&path);
11779         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11780         key.offset = 0;
11781         key.type = BTRFS_ROOT_ITEM_KEY;
11782
11783         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11784         if (ret) {
11785                 error("cannot find extent treet in tree_root");
11786                 goto out;
11787         }
11788
11789         while (1) {
11790                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11791                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11792                         goto next;
11793                 key.offset = (u64)-1;
11794
11795                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11796                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11797                                         &key);
11798                 else
11799                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11800                 if (IS_ERR(cur_root) || !cur_root) {
11801                         error("failed to read tree: %lld", key.objectid);
11802                         goto next;
11803                 }
11804
11805                 ret = traverse_tree_block(cur_root, cur_root->node);
11806                 err |= ret;
11807
11808                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11809                         btrfs_free_fs_root(cur_root);
11810 next:
11811                 ret = btrfs_next_item(root1, &path);
11812                 if (ret)
11813                         goto out;
11814         }
11815
11816 out:
11817         btrfs_release_path(&path);
11818         return err;
11819 }
11820
11821 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11822 {
11823         int ret;
11824
11825         if (!ctx.progress_enabled)
11826                 fprintf(stderr, "checking extents\n");
11827         if (check_mode == CHECK_MODE_LOWMEM)
11828                 ret = check_chunks_and_extents_v2(fs_info);
11829         else
11830                 ret = check_chunks_and_extents(fs_info);
11831
11832         return ret;
11833 }
11834
11835 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11836                            struct btrfs_root *root, int overwrite)
11837 {
11838         struct extent_buffer *c;
11839         struct extent_buffer *old = root->node;
11840         int level;
11841         int ret;
11842         struct btrfs_disk_key disk_key = {0,0,0};
11843
11844         level = 0;
11845
11846         if (overwrite) {
11847                 c = old;
11848                 extent_buffer_get(c);
11849                 goto init;
11850         }
11851         c = btrfs_alloc_free_block(trans, root,
11852                                    root->fs_info->nodesize,
11853                                    root->root_key.objectid,
11854                                    &disk_key, level, 0, 0);
11855         if (IS_ERR(c)) {
11856                 c = old;
11857                 extent_buffer_get(c);
11858                 overwrite = 1;
11859         }
11860 init:
11861         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11862         btrfs_set_header_level(c, level);
11863         btrfs_set_header_bytenr(c, c->start);
11864         btrfs_set_header_generation(c, trans->transid);
11865         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11866         btrfs_set_header_owner(c, root->root_key.objectid);
11867
11868         write_extent_buffer(c, root->fs_info->fsid,
11869                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11870
11871         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11872                             btrfs_header_chunk_tree_uuid(c),
11873                             BTRFS_UUID_SIZE);
11874
11875         btrfs_mark_buffer_dirty(c);
11876         /*
11877          * this case can happen in the following case:
11878          *
11879          * 1.overwrite previous root.
11880          *
11881          * 2.reinit reloc data root, this is because we skip pin
11882          * down reloc data tree before which means we can allocate
11883          * same block bytenr here.
11884          */
11885         if (old->start == c->start) {
11886                 btrfs_set_root_generation(&root->root_item,
11887                                           trans->transid);
11888                 root->root_item.level = btrfs_header_level(root->node);
11889                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11890                                         &root->root_key, &root->root_item);
11891                 if (ret) {
11892                         free_extent_buffer(c);
11893                         return ret;
11894                 }
11895         }
11896         free_extent_buffer(old);
11897         root->node = c;
11898         add_root_to_dirty_list(root);
11899         return 0;
11900 }
11901
11902 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11903                                 struct extent_buffer *eb, int tree_root)
11904 {
11905         struct extent_buffer *tmp;
11906         struct btrfs_root_item *ri;
11907         struct btrfs_key key;
11908         u64 bytenr;
11909         int level = btrfs_header_level(eb);
11910         int nritems;
11911         int ret;
11912         int i;
11913
11914         /*
11915          * If we have pinned this block before, don't pin it again.
11916          * This can not only avoid forever loop with broken filesystem
11917          * but also give us some speedups.
11918          */
11919         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11920                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11921                 return 0;
11922
11923         btrfs_pin_extent(fs_info, eb->start, eb->len);
11924
11925         nritems = btrfs_header_nritems(eb);
11926         for (i = 0; i < nritems; i++) {
11927                 if (level == 0) {
11928                         btrfs_item_key_to_cpu(eb, &key, i);
11929                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11930                                 continue;
11931                         /* Skip the extent root and reloc roots */
11932                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11933                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11934                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11935                                 continue;
11936                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11937                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11938
11939                         /*
11940                          * If at any point we start needing the real root we
11941                          * will have to build a stump root for the root we are
11942                          * in, but for now this doesn't actually use the root so
11943                          * just pass in extent_root.
11944                          */
11945                         tmp = read_tree_block(fs_info, bytenr, 0);
11946                         if (!extent_buffer_uptodate(tmp)) {
11947                                 fprintf(stderr, "Error reading root block\n");
11948                                 return -EIO;
11949                         }
11950                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11951                         free_extent_buffer(tmp);
11952                         if (ret)
11953                                 return ret;
11954                 } else {
11955                         bytenr = btrfs_node_blockptr(eb, i);
11956
11957                         /* If we aren't the tree root don't read the block */
11958                         if (level == 1 && !tree_root) {
11959                                 btrfs_pin_extent(fs_info, bytenr,
11960                                                 fs_info->nodesize);
11961                                 continue;
11962                         }
11963
11964                         tmp = read_tree_block(fs_info, bytenr, 0);
11965                         if (!extent_buffer_uptodate(tmp)) {
11966                                 fprintf(stderr, "Error reading tree block\n");
11967                                 return -EIO;
11968                         }
11969                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11970                         free_extent_buffer(tmp);
11971                         if (ret)
11972                                 return ret;
11973                 }
11974         }
11975
11976         return 0;
11977 }
11978
11979 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11980 {
11981         int ret;
11982
11983         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11984         if (ret)
11985                 return ret;
11986
11987         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11988 }
11989
11990 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11991 {
11992         struct btrfs_block_group_cache *cache;
11993         struct btrfs_path path;
11994         struct extent_buffer *leaf;
11995         struct btrfs_chunk *chunk;
11996         struct btrfs_key key;
11997         int ret;
11998         u64 start;
11999
12000         btrfs_init_path(&path);
12001         key.objectid = 0;
12002         key.type = BTRFS_CHUNK_ITEM_KEY;
12003         key.offset = 0;
12004         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12005         if (ret < 0) {
12006                 btrfs_release_path(&path);
12007                 return ret;
12008         }
12009
12010         /*
12011          * We do this in case the block groups were screwed up and had alloc
12012          * bits that aren't actually set on the chunks.  This happens with
12013          * restored images every time and could happen in real life I guess.
12014          */
12015         fs_info->avail_data_alloc_bits = 0;
12016         fs_info->avail_metadata_alloc_bits = 0;
12017         fs_info->avail_system_alloc_bits = 0;
12018
12019         /* First we need to create the in-memory block groups */
12020         while (1) {
12021                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12022                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12023                         if (ret < 0) {
12024                                 btrfs_release_path(&path);
12025                                 return ret;
12026                         }
12027                         if (ret) {
12028                                 ret = 0;
12029                                 break;
12030                         }
12031                 }
12032                 leaf = path.nodes[0];
12033                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12034                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12035                         path.slots[0]++;
12036                         continue;
12037                 }
12038
12039                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12040                 btrfs_add_block_group(fs_info, 0,
12041                                       btrfs_chunk_type(leaf, chunk),
12042                                       key.objectid, key.offset,
12043                                       btrfs_chunk_length(leaf, chunk));
12044                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12045                                  key.offset + btrfs_chunk_length(leaf, chunk));
12046                 path.slots[0]++;
12047         }
12048         start = 0;
12049         while (1) {
12050                 cache = btrfs_lookup_first_block_group(fs_info, start);
12051                 if (!cache)
12052                         break;
12053                 cache->cached = 1;
12054                 start = cache->key.objectid + cache->key.offset;
12055         }
12056
12057         btrfs_release_path(&path);
12058         return 0;
12059 }
12060
12061 static int reset_balance(struct btrfs_trans_handle *trans,
12062                          struct btrfs_fs_info *fs_info)
12063 {
12064         struct btrfs_root *root = fs_info->tree_root;
12065         struct btrfs_path path;
12066         struct extent_buffer *leaf;
12067         struct btrfs_key key;
12068         int del_slot, del_nr = 0;
12069         int ret;
12070         int found = 0;
12071
12072         btrfs_init_path(&path);
12073         key.objectid = BTRFS_BALANCE_OBJECTID;
12074         key.type = BTRFS_BALANCE_ITEM_KEY;
12075         key.offset = 0;
12076         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12077         if (ret) {
12078                 if (ret > 0)
12079                         ret = 0;
12080                 if (!ret)
12081                         goto reinit_data_reloc;
12082                 else
12083                         goto out;
12084         }
12085
12086         ret = btrfs_del_item(trans, root, &path);
12087         if (ret)
12088                 goto out;
12089         btrfs_release_path(&path);
12090
12091         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12092         key.type = BTRFS_ROOT_ITEM_KEY;
12093         key.offset = 0;
12094         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12095         if (ret < 0)
12096                 goto out;
12097         while (1) {
12098                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12099                         if (!found)
12100                                 break;
12101
12102                         if (del_nr) {
12103                                 ret = btrfs_del_items(trans, root, &path,
12104                                                       del_slot, del_nr);
12105                                 del_nr = 0;
12106                                 if (ret)
12107                                         goto out;
12108                         }
12109                         key.offset++;
12110                         btrfs_release_path(&path);
12111
12112                         found = 0;
12113                         ret = btrfs_search_slot(trans, root, &key, &path,
12114                                                 -1, 1);
12115                         if (ret < 0)
12116                                 goto out;
12117                         continue;
12118                 }
12119                 found = 1;
12120                 leaf = path.nodes[0];
12121                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12122                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12123                         break;
12124                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12125                         path.slots[0]++;
12126                         continue;
12127                 }
12128                 if (!del_nr) {
12129                         del_slot = path.slots[0];
12130                         del_nr = 1;
12131                 } else {
12132                         del_nr++;
12133                 }
12134                 path.slots[0]++;
12135         }
12136
12137         if (del_nr) {
12138                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12139                 if (ret)
12140                         goto out;
12141         }
12142         btrfs_release_path(&path);
12143
12144 reinit_data_reloc:
12145         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12146         key.type = BTRFS_ROOT_ITEM_KEY;
12147         key.offset = (u64)-1;
12148         root = btrfs_read_fs_root(fs_info, &key);
12149         if (IS_ERR(root)) {
12150                 fprintf(stderr, "Error reading data reloc tree\n");
12151                 ret = PTR_ERR(root);
12152                 goto out;
12153         }
12154         record_root_in_trans(trans, root);
12155         ret = btrfs_fsck_reinit_root(trans, root, 0);
12156         if (ret)
12157                 goto out;
12158         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12159 out:
12160         btrfs_release_path(&path);
12161         return ret;
12162 }
12163
12164 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12165                               struct btrfs_fs_info *fs_info)
12166 {
12167         u64 start = 0;
12168         int ret;
12169
12170         /*
12171          * The only reason we don't do this is because right now we're just
12172          * walking the trees we find and pinning down their bytes, we don't look
12173          * at any of the leaves.  In order to do mixed groups we'd have to check
12174          * the leaves of any fs roots and pin down the bytes for any file
12175          * extents we find.  Not hard but why do it if we don't have to?
12176          */
12177         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12178                 fprintf(stderr, "We don't support re-initing the extent tree "
12179                         "for mixed block groups yet, please notify a btrfs "
12180                         "developer you want to do this so they can add this "
12181                         "functionality.\n");
12182                 return -EINVAL;
12183         }
12184
12185         /*
12186          * first we need to walk all of the trees except the extent tree and pin
12187          * down the bytes that are in use so we don't overwrite any existing
12188          * metadata.
12189          */
12190         ret = pin_metadata_blocks(fs_info);
12191         if (ret) {
12192                 fprintf(stderr, "error pinning down used bytes\n");
12193                 return ret;
12194         }
12195
12196         /*
12197          * Need to drop all the block groups since we're going to recreate all
12198          * of them again.
12199          */
12200         btrfs_free_block_groups(fs_info);
12201         ret = reset_block_groups(fs_info);
12202         if (ret) {
12203                 fprintf(stderr, "error resetting the block groups\n");
12204                 return ret;
12205         }
12206
12207         /* Ok we can allocate now, reinit the extent root */
12208         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12209         if (ret) {
12210                 fprintf(stderr, "extent root initialization failed\n");
12211                 /*
12212                  * When the transaction code is updated we should end the
12213                  * transaction, but for now progs only knows about commit so
12214                  * just return an error.
12215                  */
12216                 return ret;
12217         }
12218
12219         /*
12220          * Now we have all the in-memory block groups setup so we can make
12221          * allocations properly, and the metadata we care about is safe since we
12222          * pinned all of it above.
12223          */
12224         while (1) {
12225                 struct btrfs_block_group_cache *cache;
12226
12227                 cache = btrfs_lookup_first_block_group(fs_info, start);
12228                 if (!cache)
12229                         break;
12230                 start = cache->key.objectid + cache->key.offset;
12231                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12232                                         &cache->key, &cache->item,
12233                                         sizeof(cache->item));
12234                 if (ret) {
12235                         fprintf(stderr, "Error adding block group\n");
12236                         return ret;
12237                 }
12238                 btrfs_extent_post_op(trans, fs_info->extent_root);
12239         }
12240
12241         ret = reset_balance(trans, fs_info);
12242         if (ret)
12243                 fprintf(stderr, "error resetting the pending balance\n");
12244
12245         return ret;
12246 }
12247
12248 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12249 {
12250         struct btrfs_path path;
12251         struct btrfs_trans_handle *trans;
12252         struct btrfs_key key;
12253         int ret;
12254
12255         printf("Recowing metadata block %llu\n", eb->start);
12256         key.objectid = btrfs_header_owner(eb);
12257         key.type = BTRFS_ROOT_ITEM_KEY;
12258         key.offset = (u64)-1;
12259
12260         root = btrfs_read_fs_root(root->fs_info, &key);
12261         if (IS_ERR(root)) {
12262                 fprintf(stderr, "Couldn't find owner root %llu\n",
12263                         key.objectid);
12264                 return PTR_ERR(root);
12265         }
12266
12267         trans = btrfs_start_transaction(root, 1);
12268         if (IS_ERR(trans))
12269                 return PTR_ERR(trans);
12270
12271         btrfs_init_path(&path);
12272         path.lowest_level = btrfs_header_level(eb);
12273         if (path.lowest_level)
12274                 btrfs_node_key_to_cpu(eb, &key, 0);
12275         else
12276                 btrfs_item_key_to_cpu(eb, &key, 0);
12277
12278         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12279         btrfs_commit_transaction(trans, root);
12280         btrfs_release_path(&path);
12281         return ret;
12282 }
12283
12284 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12285 {
12286         struct btrfs_path path;
12287         struct btrfs_trans_handle *trans;
12288         struct btrfs_key key;
12289         int ret;
12290
12291         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12292                bad->key.type, bad->key.offset);
12293         key.objectid = bad->root_id;
12294         key.type = BTRFS_ROOT_ITEM_KEY;
12295         key.offset = (u64)-1;
12296
12297         root = btrfs_read_fs_root(root->fs_info, &key);
12298         if (IS_ERR(root)) {
12299                 fprintf(stderr, "Couldn't find owner root %llu\n",
12300                         key.objectid);
12301                 return PTR_ERR(root);
12302         }
12303
12304         trans = btrfs_start_transaction(root, 1);
12305         if (IS_ERR(trans))
12306                 return PTR_ERR(trans);
12307
12308         btrfs_init_path(&path);
12309         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12310         if (ret) {
12311                 if (ret > 0)
12312                         ret = 0;
12313                 goto out;
12314         }
12315         ret = btrfs_del_item(trans, root, &path);
12316 out:
12317         btrfs_commit_transaction(trans, root);
12318         btrfs_release_path(&path);
12319         return ret;
12320 }
12321
12322 static int zero_log_tree(struct btrfs_root *root)
12323 {
12324         struct btrfs_trans_handle *trans;
12325         int ret;
12326
12327         trans = btrfs_start_transaction(root, 1);
12328         if (IS_ERR(trans)) {
12329                 ret = PTR_ERR(trans);
12330                 return ret;
12331         }
12332         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12333         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12334         ret = btrfs_commit_transaction(trans, root);
12335         return ret;
12336 }
12337
12338 static int populate_csum(struct btrfs_trans_handle *trans,
12339                          struct btrfs_root *csum_root, char *buf, u64 start,
12340                          u64 len)
12341 {
12342         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12343         u64 offset = 0;
12344         u64 sectorsize;
12345         int ret = 0;
12346
12347         while (offset < len) {
12348                 sectorsize = fs_info->sectorsize;
12349                 ret = read_extent_data(fs_info, buf, start + offset,
12350                                        &sectorsize, 0);
12351                 if (ret)
12352                         break;
12353                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12354                                             start + offset, buf, sectorsize);
12355                 if (ret)
12356                         break;
12357                 offset += sectorsize;
12358         }
12359         return ret;
12360 }
12361
12362 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12363                                       struct btrfs_root *csum_root,
12364                                       struct btrfs_root *cur_root)
12365 {
12366         struct btrfs_path path;
12367         struct btrfs_key key;
12368         struct extent_buffer *node;
12369         struct btrfs_file_extent_item *fi;
12370         char *buf = NULL;
12371         u64 start = 0;
12372         u64 len = 0;
12373         int slot = 0;
12374         int ret = 0;
12375
12376         buf = malloc(cur_root->fs_info->sectorsize);
12377         if (!buf)
12378                 return -ENOMEM;
12379
12380         btrfs_init_path(&path);
12381         key.objectid = 0;
12382         key.offset = 0;
12383         key.type = 0;
12384         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12385         if (ret < 0)
12386                 goto out;
12387         /* Iterate all regular file extents and fill its csum */
12388         while (1) {
12389                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12390
12391                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12392                         goto next;
12393                 node = path.nodes[0];
12394                 slot = path.slots[0];
12395                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12396                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12397                         goto next;
12398                 start = btrfs_file_extent_disk_bytenr(node, fi);
12399                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12400
12401                 ret = populate_csum(trans, csum_root, buf, start, len);
12402                 if (ret == -EEXIST)
12403                         ret = 0;
12404                 if (ret < 0)
12405                         goto out;
12406 next:
12407                 /*
12408                  * TODO: if next leaf is corrupted, jump to nearest next valid
12409                  * leaf.
12410                  */
12411                 ret = btrfs_next_item(cur_root, &path);
12412                 if (ret < 0)
12413                         goto out;
12414                 if (ret > 0) {
12415                         ret = 0;
12416                         goto out;
12417                 }
12418         }
12419
12420 out:
12421         btrfs_release_path(&path);
12422         free(buf);
12423         return ret;
12424 }
12425
12426 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12427                                   struct btrfs_root *csum_root)
12428 {
12429         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12430         struct btrfs_path path;
12431         struct btrfs_root *tree_root = fs_info->tree_root;
12432         struct btrfs_root *cur_root;
12433         struct extent_buffer *node;
12434         struct btrfs_key key;
12435         int slot = 0;
12436         int ret = 0;
12437
12438         btrfs_init_path(&path);
12439         key.objectid = BTRFS_FS_TREE_OBJECTID;
12440         key.offset = 0;
12441         key.type = BTRFS_ROOT_ITEM_KEY;
12442         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12443         if (ret < 0)
12444                 goto out;
12445         if (ret > 0) {
12446                 ret = -ENOENT;
12447                 goto out;
12448         }
12449
12450         while (1) {
12451                 node = path.nodes[0];
12452                 slot = path.slots[0];
12453                 btrfs_item_key_to_cpu(node, &key, slot);
12454                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12455                         goto out;
12456                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12457                         goto next;
12458                 if (!is_fstree(key.objectid))
12459                         goto next;
12460                 key.offset = (u64)-1;
12461
12462                 cur_root = btrfs_read_fs_root(fs_info, &key);
12463                 if (IS_ERR(cur_root) || !cur_root) {
12464                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12465                                 key.objectid);
12466                         goto out;
12467                 }
12468                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12469                                 cur_root);
12470                 if (ret < 0)
12471                         goto out;
12472 next:
12473                 ret = btrfs_next_item(tree_root, &path);
12474                 if (ret > 0) {
12475                         ret = 0;
12476                         goto out;
12477                 }
12478                 if (ret < 0)
12479                         goto out;
12480         }
12481
12482 out:
12483         btrfs_release_path(&path);
12484         return ret;
12485 }
12486
12487 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12488                                       struct btrfs_root *csum_root)
12489 {
12490         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12491         struct btrfs_path path;
12492         struct btrfs_extent_item *ei;
12493         struct extent_buffer *leaf;
12494         char *buf;
12495         struct btrfs_key key;
12496         int ret;
12497
12498         btrfs_init_path(&path);
12499         key.objectid = 0;
12500         key.type = BTRFS_EXTENT_ITEM_KEY;
12501         key.offset = 0;
12502         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12503         if (ret < 0) {
12504                 btrfs_release_path(&path);
12505                 return ret;
12506         }
12507
12508         buf = malloc(csum_root->fs_info->sectorsize);
12509         if (!buf) {
12510                 btrfs_release_path(&path);
12511                 return -ENOMEM;
12512         }
12513
12514         while (1) {
12515                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12516                         ret = btrfs_next_leaf(extent_root, &path);
12517                         if (ret < 0)
12518                                 break;
12519                         if (ret) {
12520                                 ret = 0;
12521                                 break;
12522                         }
12523                 }
12524                 leaf = path.nodes[0];
12525
12526                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12527                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12528                         path.slots[0]++;
12529                         continue;
12530                 }
12531
12532                 ei = btrfs_item_ptr(leaf, path.slots[0],
12533                                     struct btrfs_extent_item);
12534                 if (!(btrfs_extent_flags(leaf, ei) &
12535                       BTRFS_EXTENT_FLAG_DATA)) {
12536                         path.slots[0]++;
12537                         continue;
12538                 }
12539
12540                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12541                                     key.offset);
12542                 if (ret)
12543                         break;
12544                 path.slots[0]++;
12545         }
12546
12547         btrfs_release_path(&path);
12548         free(buf);
12549         return ret;
12550 }
12551
12552 /*
12553  * Recalculate the csum and put it into the csum tree.
12554  *
12555  * Extent tree init will wipe out all the extent info, so in that case, we
12556  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12557  * will use fs/subvol trees to init the csum tree.
12558  */
12559 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12560                           struct btrfs_root *csum_root,
12561                           int search_fs_tree)
12562 {
12563         if (search_fs_tree)
12564                 return fill_csum_tree_from_fs(trans, csum_root);
12565         else
12566                 return fill_csum_tree_from_extent(trans, csum_root);
12567 }
12568
12569 static void free_roots_info_cache(void)
12570 {
12571         if (!roots_info_cache)
12572                 return;
12573
12574         while (!cache_tree_empty(roots_info_cache)) {
12575                 struct cache_extent *entry;
12576                 struct root_item_info *rii;
12577
12578                 entry = first_cache_extent(roots_info_cache);
12579                 if (!entry)
12580                         break;
12581                 remove_cache_extent(roots_info_cache, entry);
12582                 rii = container_of(entry, struct root_item_info, cache_extent);
12583                 free(rii);
12584         }
12585
12586         free(roots_info_cache);
12587         roots_info_cache = NULL;
12588 }
12589
12590 static int build_roots_info_cache(struct btrfs_fs_info *info)
12591 {
12592         int ret = 0;
12593         struct btrfs_key key;
12594         struct extent_buffer *leaf;
12595         struct btrfs_path path;
12596
12597         if (!roots_info_cache) {
12598                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12599                 if (!roots_info_cache)
12600                         return -ENOMEM;
12601                 cache_tree_init(roots_info_cache);
12602         }
12603
12604         btrfs_init_path(&path);
12605         key.objectid = 0;
12606         key.type = BTRFS_EXTENT_ITEM_KEY;
12607         key.offset = 0;
12608         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12609         if (ret < 0)
12610                 goto out;
12611         leaf = path.nodes[0];
12612
12613         while (1) {
12614                 struct btrfs_key found_key;
12615                 struct btrfs_extent_item *ei;
12616                 struct btrfs_extent_inline_ref *iref;
12617                 int slot = path.slots[0];
12618                 int type;
12619                 u64 flags;
12620                 u64 root_id;
12621                 u8 level;
12622                 struct cache_extent *entry;
12623                 struct root_item_info *rii;
12624
12625                 if (slot >= btrfs_header_nritems(leaf)) {
12626                         ret = btrfs_next_leaf(info->extent_root, &path);
12627                         if (ret < 0) {
12628                                 break;
12629                         } else if (ret) {
12630                                 ret = 0;
12631                                 break;
12632                         }
12633                         leaf = path.nodes[0];
12634                         slot = path.slots[0];
12635                 }
12636
12637                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12638
12639                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12640                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12641                         goto next;
12642
12643                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12644                 flags = btrfs_extent_flags(leaf, ei);
12645
12646                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12647                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12648                         goto next;
12649
12650                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12651                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12652                         level = found_key.offset;
12653                 } else {
12654                         struct btrfs_tree_block_info *binfo;
12655
12656                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12657                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12658                         level = btrfs_tree_block_level(leaf, binfo);
12659                 }
12660
12661                 /*
12662                  * For a root extent, it must be of the following type and the
12663                  * first (and only one) iref in the item.
12664                  */
12665                 type = btrfs_extent_inline_ref_type(leaf, iref);
12666                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12667                         goto next;
12668
12669                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12670                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12671                 if (!entry) {
12672                         rii = malloc(sizeof(struct root_item_info));
12673                         if (!rii) {
12674                                 ret = -ENOMEM;
12675                                 goto out;
12676                         }
12677                         rii->cache_extent.start = root_id;
12678                         rii->cache_extent.size = 1;
12679                         rii->level = (u8)-1;
12680                         entry = &rii->cache_extent;
12681                         ret = insert_cache_extent(roots_info_cache, entry);
12682                         ASSERT(ret == 0);
12683                 } else {
12684                         rii = container_of(entry, struct root_item_info,
12685                                            cache_extent);
12686                 }
12687
12688                 ASSERT(rii->cache_extent.start == root_id);
12689                 ASSERT(rii->cache_extent.size == 1);
12690
12691                 if (level > rii->level || rii->level == (u8)-1) {
12692                         rii->level = level;
12693                         rii->bytenr = found_key.objectid;
12694                         rii->gen = btrfs_extent_generation(leaf, ei);
12695                         rii->node_count = 1;
12696                 } else if (level == rii->level) {
12697                         rii->node_count++;
12698                 }
12699 next:
12700                 path.slots[0]++;
12701         }
12702
12703 out:
12704         btrfs_release_path(&path);
12705
12706         return ret;
12707 }
12708
12709 static int maybe_repair_root_item(struct btrfs_path *path,
12710                                   const struct btrfs_key *root_key,
12711                                   const int read_only_mode)
12712 {
12713         const u64 root_id = root_key->objectid;
12714         struct cache_extent *entry;
12715         struct root_item_info *rii;
12716         struct btrfs_root_item ri;
12717         unsigned long offset;
12718
12719         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12720         if (!entry) {
12721                 fprintf(stderr,
12722                         "Error: could not find extent items for root %llu\n",
12723                         root_key->objectid);
12724                 return -ENOENT;
12725         }
12726
12727         rii = container_of(entry, struct root_item_info, cache_extent);
12728         ASSERT(rii->cache_extent.start == root_id);
12729         ASSERT(rii->cache_extent.size == 1);
12730
12731         if (rii->node_count != 1) {
12732                 fprintf(stderr,
12733                         "Error: could not find btree root extent for root %llu\n",
12734                         root_id);
12735                 return -ENOENT;
12736         }
12737
12738         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12739         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12740
12741         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12742             btrfs_root_level(&ri) != rii->level ||
12743             btrfs_root_generation(&ri) != rii->gen) {
12744
12745                 /*
12746                  * If we're in repair mode but our caller told us to not update
12747                  * the root item, i.e. just check if it needs to be updated, don't
12748                  * print this message, since the caller will call us again shortly
12749                  * for the same root item without read only mode (the caller will
12750                  * open a transaction first).
12751                  */
12752                 if (!(read_only_mode && repair))
12753                         fprintf(stderr,
12754                                 "%sroot item for root %llu,"
12755                                 " current bytenr %llu, current gen %llu, current level %u,"
12756                                 " new bytenr %llu, new gen %llu, new level %u\n",
12757                                 (read_only_mode ? "" : "fixing "),
12758                                 root_id,
12759                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12760                                 btrfs_root_level(&ri),
12761                                 rii->bytenr, rii->gen, rii->level);
12762
12763                 if (btrfs_root_generation(&ri) > rii->gen) {
12764                         fprintf(stderr,
12765                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12766                                 root_id, btrfs_root_generation(&ri), rii->gen);
12767                         return -EINVAL;
12768                 }
12769
12770                 if (!read_only_mode) {
12771                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12772                         btrfs_set_root_level(&ri, rii->level);
12773                         btrfs_set_root_generation(&ri, rii->gen);
12774                         write_extent_buffer(path->nodes[0], &ri,
12775                                             offset, sizeof(ri));
12776                 }
12777
12778                 return 1;
12779         }
12780
12781         return 0;
12782 }
12783
12784 /*
12785  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12786  * caused read-only snapshots to be corrupted if they were created at a moment
12787  * when the source subvolume/snapshot had orphan items. The issue was that the
12788  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12789  * node instead of the post orphan cleanup root node.
12790  * So this function, and its callees, just detects and fixes those cases. Even
12791  * though the regression was for read-only snapshots, this function applies to
12792  * any snapshot/subvolume root.
12793  * This must be run before any other repair code - not doing it so, makes other
12794  * repair code delete or modify backrefs in the extent tree for example, which
12795  * will result in an inconsistent fs after repairing the root items.
12796  */
12797 static int repair_root_items(struct btrfs_fs_info *info)
12798 {
12799         struct btrfs_path path;
12800         struct btrfs_key key;
12801         struct extent_buffer *leaf;
12802         struct btrfs_trans_handle *trans = NULL;
12803         int ret = 0;
12804         int bad_roots = 0;
12805         int need_trans = 0;
12806
12807         btrfs_init_path(&path);
12808
12809         ret = build_roots_info_cache(info);
12810         if (ret)
12811                 goto out;
12812
12813         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12814         key.type = BTRFS_ROOT_ITEM_KEY;
12815         key.offset = 0;
12816
12817 again:
12818         /*
12819          * Avoid opening and committing transactions if a leaf doesn't have
12820          * any root items that need to be fixed, so that we avoid rotating
12821          * backup roots unnecessarily.
12822          */
12823         if (need_trans) {
12824                 trans = btrfs_start_transaction(info->tree_root, 1);
12825                 if (IS_ERR(trans)) {
12826                         ret = PTR_ERR(trans);
12827                         goto out;
12828                 }
12829         }
12830
12831         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12832                                 0, trans ? 1 : 0);
12833         if (ret < 0)
12834                 goto out;
12835         leaf = path.nodes[0];
12836
12837         while (1) {
12838                 struct btrfs_key found_key;
12839
12840                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12841                         int no_more_keys = find_next_key(&path, &key);
12842
12843                         btrfs_release_path(&path);
12844                         if (trans) {
12845                                 ret = btrfs_commit_transaction(trans,
12846                                                                info->tree_root);
12847                                 trans = NULL;
12848                                 if (ret < 0)
12849                                         goto out;
12850                         }
12851                         need_trans = 0;
12852                         if (no_more_keys)
12853                                 break;
12854                         goto again;
12855                 }
12856
12857                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12858
12859                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12860                         goto next;
12861                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12862                         goto next;
12863
12864                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12865                 if (ret < 0)
12866                         goto out;
12867                 if (ret) {
12868                         if (!trans && repair) {
12869                                 need_trans = 1;
12870                                 key = found_key;
12871                                 btrfs_release_path(&path);
12872                                 goto again;
12873                         }
12874                         bad_roots++;
12875                 }
12876 next:
12877                 path.slots[0]++;
12878         }
12879         ret = 0;
12880 out:
12881         free_roots_info_cache();
12882         btrfs_release_path(&path);
12883         if (trans)
12884                 btrfs_commit_transaction(trans, info->tree_root);
12885         if (ret < 0)
12886                 return ret;
12887
12888         return bad_roots;
12889 }
12890
12891 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12892 {
12893         struct btrfs_trans_handle *trans;
12894         struct btrfs_block_group_cache *bg_cache;
12895         u64 current = 0;
12896         int ret = 0;
12897
12898         /* Clear all free space cache inodes and its extent data */
12899         while (1) {
12900                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12901                 if (!bg_cache)
12902                         break;
12903                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12904                 if (ret < 0)
12905                         return ret;
12906                 current = bg_cache->key.objectid + bg_cache->key.offset;
12907         }
12908
12909         /* Don't forget to set cache_generation to -1 */
12910         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12911         if (IS_ERR(trans)) {
12912                 error("failed to update super block cache generation");
12913                 return PTR_ERR(trans);
12914         }
12915         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12916         btrfs_commit_transaction(trans, fs_info->tree_root);
12917
12918         return ret;
12919 }
12920
12921 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12922                 int clear_version)
12923 {
12924         int ret = 0;
12925
12926         if (clear_version == 1) {
12927                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12928                         error(
12929                 "free space cache v2 detected, use --clear-space-cache v2");
12930                         ret = 1;
12931                         goto close_out;
12932                 }
12933                 printf("Clearing free space cache\n");
12934                 ret = clear_free_space_cache(fs_info);
12935                 if (ret) {
12936                         error("failed to clear free space cache");
12937                         ret = 1;
12938                 } else {
12939                         printf("Free space cache cleared\n");
12940                 }
12941         } else if (clear_version == 2) {
12942                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12943                         printf("no free space cache v2 to clear\n");
12944                         ret = 0;
12945                         goto close_out;
12946                 }
12947                 printf("Clear free space cache v2\n");
12948                 ret = btrfs_clear_free_space_tree(fs_info);
12949                 if (ret) {
12950                         error("failed to clear free space cache v2: %d", ret);
12951                         ret = 1;
12952                 } else {
12953                         printf("free space cache v2 cleared\n");
12954                 }
12955         }
12956 close_out:
12957         return ret;
12958 }
12959
12960 const char * const cmd_check_usage[] = {
12961         "btrfs check [options] <device>",
12962         "Check structural integrity of a filesystem (unmounted).",
12963         "Check structural integrity of an unmounted filesystem. Verify internal",
12964         "trees' consistency and item connectivity. In the repair mode try to",
12965         "fix the problems found. ",
12966         "WARNING: the repair mode is considered dangerous",
12967         "",
12968         "-s|--super <superblock>     use this superblock copy",
12969         "-b|--backup                 use the first valid backup root copy",
12970         "--force                     skip mount checks, repair is not possible",
12971         "--repair                    try to repair the filesystem",
12972         "--readonly                  run in read-only mode (default)",
12973         "--init-csum-tree            create a new CRC tree",
12974         "--init-extent-tree          create a new extent tree",
12975         "--mode <MODE>               allows choice of memory/IO trade-offs",
12976         "                            where MODE is one of:",
12977         "                            original - read inodes and extents to memory (requires",
12978         "                                       more memory, does less IO)",
12979         "                            lowmem   - try to use less memory but read blocks again",
12980         "                                       when needed",
12981         "--check-data-csum           verify checksums of data blocks",
12982         "-Q|--qgroup-report          print a report on qgroup consistency",
12983         "-E|--subvol-extents <subvolid>",
12984         "                            print subvolume extents and sharing state",
12985         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12986         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12987         "-p|--progress               indicate progress",
12988         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12989         NULL
12990 };
12991
12992 int cmd_check(int argc, char **argv)
12993 {
12994         struct cache_tree root_cache;
12995         struct btrfs_root *root;
12996         struct btrfs_fs_info *info;
12997         u64 bytenr = 0;
12998         u64 subvolid = 0;
12999         u64 tree_root_bytenr = 0;
13000         u64 chunk_root_bytenr = 0;
13001         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13002         int ret = 0;
13003         int err = 0;
13004         u64 num;
13005         int init_csum_tree = 0;
13006         int readonly = 0;
13007         int clear_space_cache = 0;
13008         int qgroup_report = 0;
13009         int qgroups_repaired = 0;
13010         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13011         int force = 0;
13012
13013         while(1) {
13014                 int c;
13015                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13016                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13017                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13018                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13019                         GETOPT_VAL_FORCE };
13020                 static const struct option long_options[] = {
13021                         { "super", required_argument, NULL, 's' },
13022                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13023                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13024                         { "init-csum-tree", no_argument, NULL,
13025                                 GETOPT_VAL_INIT_CSUM },
13026                         { "init-extent-tree", no_argument, NULL,
13027                                 GETOPT_VAL_INIT_EXTENT },
13028                         { "check-data-csum", no_argument, NULL,
13029                                 GETOPT_VAL_CHECK_CSUM },
13030                         { "backup", no_argument, NULL, 'b' },
13031                         { "subvol-extents", required_argument, NULL, 'E' },
13032                         { "qgroup-report", no_argument, NULL, 'Q' },
13033                         { "tree-root", required_argument, NULL, 'r' },
13034                         { "chunk-root", required_argument, NULL,
13035                                 GETOPT_VAL_CHUNK_TREE },
13036                         { "progress", no_argument, NULL, 'p' },
13037                         { "mode", required_argument, NULL,
13038                                 GETOPT_VAL_MODE },
13039                         { "clear-space-cache", required_argument, NULL,
13040                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13041                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13042                         { NULL, 0, NULL, 0}
13043                 };
13044
13045                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13046                 if (c < 0)
13047                         break;
13048                 switch(c) {
13049                         case 'a': /* ignored */ break;
13050                         case 'b':
13051                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13052                                 break;
13053                         case 's':
13054                                 num = arg_strtou64(optarg);
13055                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13056                                         error(
13057                                         "super mirror should be less than %d",
13058                                                 BTRFS_SUPER_MIRROR_MAX);
13059                                         exit(1);
13060                                 }
13061                                 bytenr = btrfs_sb_offset(((int)num));
13062                                 printf("using SB copy %llu, bytenr %llu\n", num,
13063                                        (unsigned long long)bytenr);
13064                                 break;
13065                         case 'Q':
13066                                 qgroup_report = 1;
13067                                 break;
13068                         case 'E':
13069                                 subvolid = arg_strtou64(optarg);
13070                                 break;
13071                         case 'r':
13072                                 tree_root_bytenr = arg_strtou64(optarg);
13073                                 break;
13074                         case GETOPT_VAL_CHUNK_TREE:
13075                                 chunk_root_bytenr = arg_strtou64(optarg);
13076                                 break;
13077                         case 'p':
13078                                 ctx.progress_enabled = true;
13079                                 break;
13080                         case '?':
13081                         case 'h':
13082                                 usage(cmd_check_usage);
13083                         case GETOPT_VAL_REPAIR:
13084                                 printf("enabling repair mode\n");
13085                                 repair = 1;
13086                                 ctree_flags |= OPEN_CTREE_WRITES;
13087                                 break;
13088                         case GETOPT_VAL_READONLY:
13089                                 readonly = 1;
13090                                 break;
13091                         case GETOPT_VAL_INIT_CSUM:
13092                                 printf("Creating a new CRC tree\n");
13093                                 init_csum_tree = 1;
13094                                 repair = 1;
13095                                 ctree_flags |= OPEN_CTREE_WRITES;
13096                                 break;
13097                         case GETOPT_VAL_INIT_EXTENT:
13098                                 init_extent_tree = 1;
13099                                 ctree_flags |= (OPEN_CTREE_WRITES |
13100                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13101                                 repair = 1;
13102                                 break;
13103                         case GETOPT_VAL_CHECK_CSUM:
13104                                 check_data_csum = 1;
13105                                 break;
13106                         case GETOPT_VAL_MODE:
13107                                 check_mode = parse_check_mode(optarg);
13108                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13109                                         error("unknown mode: %s", optarg);
13110                                         exit(1);
13111                                 }
13112                                 break;
13113                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13114                                 if (strcmp(optarg, "v1") == 0) {
13115                                         clear_space_cache = 1;
13116                                 } else if (strcmp(optarg, "v2") == 0) {
13117                                         clear_space_cache = 2;
13118                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13119                                 } else {
13120                                         error(
13121                 "invalid argument to --clear-space-cache, must be v1 or v2");
13122                                         exit(1);
13123                                 }
13124                                 ctree_flags |= OPEN_CTREE_WRITES;
13125                                 break;
13126                         case GETOPT_VAL_FORCE:
13127                                 force = 1;
13128                                 break;
13129                 }
13130         }
13131
13132         if (check_argc_exact(argc - optind, 1))
13133                 usage(cmd_check_usage);
13134
13135         if (ctx.progress_enabled) {
13136                 ctx.tp = TASK_NOTHING;
13137                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13138         }
13139
13140         /* This check is the only reason for --readonly to exist */
13141         if (readonly && repair) {
13142                 error("repair options are not compatible with --readonly");
13143                 exit(1);
13144         }
13145
13146         /*
13147          * experimental and dangerous
13148          */
13149         if (repair && check_mode == CHECK_MODE_LOWMEM)
13150                 warning("low-memory mode repair support is only partial");
13151
13152         radix_tree_init();
13153         cache_tree_init(&root_cache);
13154
13155         ret = check_mounted(argv[optind]);
13156         if (!force) {
13157                 if (ret < 0) {
13158                         error("could not check mount status: %s",
13159                                         strerror(-ret));
13160                         err |= !!ret;
13161                         goto err_out;
13162                 } else if (ret) {
13163                         error(
13164 "%s is currently mounted, use --force if you really intend to check the filesystem",
13165                                 argv[optind]);
13166                         ret = -EBUSY;
13167                         err |= !!ret;
13168                         goto err_out;
13169                 }
13170         } else {
13171                 if (repair) {
13172                         error("repair and --force is not yet supported");
13173                         ret = 1;
13174                         err |= !!ret;
13175                         goto err_out;
13176                 }
13177                 if (ret < 0) {
13178                         warning(
13179 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13180                                 argv[optind]);
13181                 } else if (ret) {
13182                         warning(
13183                         "filesystem mounted, continuing because of --force");
13184                 }
13185                 /* A block device is mounted in exclusive mode by kernel */
13186                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13187         }
13188
13189         /* only allow partial opening under repair mode */
13190         if (repair)
13191                 ctree_flags |= OPEN_CTREE_PARTIAL;
13192
13193         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13194                                   chunk_root_bytenr, ctree_flags);
13195         if (!info) {
13196                 error("cannot open file system");
13197                 ret = -EIO;
13198                 err |= !!ret;
13199                 goto err_out;
13200         }
13201
13202         global_info = info;
13203         root = info->fs_root;
13204         uuid_unparse(info->super_copy->fsid, uuidbuf);
13205
13206         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13207
13208         /*
13209          * Check the bare minimum before starting anything else that could rely
13210          * on it, namely the tree roots, any local consistency checks
13211          */
13212         if (!extent_buffer_uptodate(info->tree_root->node) ||
13213             !extent_buffer_uptodate(info->dev_root->node) ||
13214             !extent_buffer_uptodate(info->chunk_root->node)) {
13215                 error("critical roots corrupted, unable to check the filesystem");
13216                 err |= !!ret;
13217                 ret = -EIO;
13218                 goto close_out;
13219         }
13220
13221         if (clear_space_cache) {
13222                 ret = do_clear_free_space_cache(info, clear_space_cache);
13223                 err |= !!ret;
13224                 goto close_out;
13225         }
13226
13227         /*
13228          * repair mode will force us to commit transaction which
13229          * will make us fail to load log tree when mounting.
13230          */
13231         if (repair && btrfs_super_log_root(info->super_copy)) {
13232                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13233                 if (!ret) {
13234                         ret = 1;
13235                         err |= !!ret;
13236                         goto close_out;
13237                 }
13238                 ret = zero_log_tree(root);
13239                 err |= !!ret;
13240                 if (ret) {
13241                         error("failed to zero log tree: %d", ret);
13242                         goto close_out;
13243                 }
13244         }
13245
13246         if (qgroup_report) {
13247                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13248                        uuidbuf);
13249                 ret = qgroup_verify_all(info);
13250                 err |= !!ret;
13251                 if (ret == 0)
13252                         report_qgroups(1);
13253                 goto close_out;
13254         }
13255         if (subvolid) {
13256                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13257                        subvolid, argv[optind], uuidbuf);
13258                 ret = print_extent_state(info, subvolid);
13259                 err |= !!ret;
13260                 goto close_out;
13261         }
13262
13263         if (init_extent_tree || init_csum_tree) {
13264                 struct btrfs_trans_handle *trans;
13265
13266                 trans = btrfs_start_transaction(info->extent_root, 0);
13267                 if (IS_ERR(trans)) {
13268                         error("error starting transaction");
13269                         ret = PTR_ERR(trans);
13270                         err |= !!ret;
13271                         goto close_out;
13272                 }
13273
13274                 if (init_extent_tree) {
13275                         printf("Creating a new extent tree\n");
13276                         ret = reinit_extent_tree(trans, info);
13277                         err |= !!ret;
13278                         if (ret)
13279                                 goto close_out;
13280                 }
13281
13282                 if (init_csum_tree) {
13283                         printf("Reinitialize checksum tree\n");
13284                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13285                         if (ret) {
13286                                 error("checksum tree initialization failed: %d",
13287                                                 ret);
13288                                 ret = -EIO;
13289                                 err |= !!ret;
13290                                 goto close_out;
13291                         }
13292
13293                         ret = fill_csum_tree(trans, info->csum_root,
13294                                              init_extent_tree);
13295                         err |= !!ret;
13296                         if (ret) {
13297                                 error("checksum tree refilling failed: %d", ret);
13298                                 return -EIO;
13299                         }
13300                 }
13301                 /*
13302                  * Ok now we commit and run the normal fsck, which will add
13303                  * extent entries for all of the items it finds.
13304                  */
13305                 ret = btrfs_commit_transaction(trans, info->extent_root);
13306                 err |= !!ret;
13307                 if (ret)
13308                         goto close_out;
13309         }
13310         if (!extent_buffer_uptodate(info->extent_root->node)) {
13311                 error("critical: extent_root, unable to check the filesystem");
13312                 ret = -EIO;
13313                 err |= !!ret;
13314                 goto close_out;
13315         }
13316         if (!extent_buffer_uptodate(info->csum_root->node)) {
13317                 error("critical: csum_root, unable to check the filesystem");
13318                 ret = -EIO;
13319                 err |= !!ret;
13320                 goto close_out;
13321         }
13322
13323         ret = do_check_chunks_and_extents(info);
13324         err |= !!ret;
13325         if (ret)
13326                 error(
13327                 "errors found in extent allocation tree or chunk allocation");
13328
13329         ret = repair_root_items(info);
13330         err |= !!ret;
13331         if (ret < 0) {
13332                 error("failed to repair root items: %s", strerror(-ret));
13333                 goto close_out;
13334         }
13335         if (repair) {
13336                 fprintf(stderr, "Fixed %d roots.\n", ret);
13337                 ret = 0;
13338         } else if (ret > 0) {
13339                 fprintf(stderr,
13340                        "Found %d roots with an outdated root item.\n",
13341                        ret);
13342                 fprintf(stderr,
13343                         "Please run a filesystem check with the option --repair to fix them.\n");
13344                 ret = 1;
13345                 err |= !!ret;
13346                 goto close_out;
13347         }
13348
13349         if (!ctx.progress_enabled) {
13350                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13351                         fprintf(stderr, "checking free space tree\n");
13352                 else
13353                         fprintf(stderr, "checking free space cache\n");
13354         }
13355         ret = check_space_cache(root);
13356         err |= !!ret;
13357         if (ret) {
13358                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13359                         error("errors found in free space tree");
13360                 else
13361                         error("errors found in free space cache");
13362                 goto out;
13363         }
13364
13365         /*
13366          * We used to have to have these hole extents in between our real
13367          * extents so if we don't have this flag set we need to make sure there
13368          * are no gaps in the file extents for inodes, otherwise we can just
13369          * ignore it when this happens.
13370          */
13371         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13372         ret = do_check_fs_roots(info, &root_cache);
13373         err |= !!ret;
13374         if (ret) {
13375                 error("errors found in fs roots");
13376                 goto out;
13377         }
13378
13379         fprintf(stderr, "checking csums\n");
13380         ret = check_csums(root);
13381         err |= !!ret;
13382         if (ret) {
13383                 error("errors found in csum tree");
13384                 goto out;
13385         }
13386
13387         fprintf(stderr, "checking root refs\n");
13388         /* For low memory mode, check_fs_roots_v2 handles root refs */
13389         if (check_mode != CHECK_MODE_LOWMEM) {
13390                 ret = check_root_refs(root, &root_cache);
13391                 err |= !!ret;
13392                 if (ret) {
13393                         error("errors found in root refs");
13394                         goto out;
13395                 }
13396         }
13397
13398         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13399                 struct extent_buffer *eb;
13400
13401                 eb = list_first_entry(&root->fs_info->recow_ebs,
13402                                       struct extent_buffer, recow);
13403                 list_del_init(&eb->recow);
13404                 ret = recow_extent_buffer(root, eb);
13405                 err |= !!ret;
13406                 if (ret) {
13407                         error("fails to fix transid errors");
13408                         break;
13409                 }
13410         }
13411
13412         while (!list_empty(&delete_items)) {
13413                 struct bad_item *bad;
13414
13415                 bad = list_first_entry(&delete_items, struct bad_item, list);
13416                 list_del_init(&bad->list);
13417                 if (repair) {
13418                         ret = delete_bad_item(root, bad);
13419                         err |= !!ret;
13420                 }
13421                 free(bad);
13422         }
13423
13424         if (info->quota_enabled) {
13425                 fprintf(stderr, "checking quota groups\n");
13426                 ret = qgroup_verify_all(info);
13427                 err |= !!ret;
13428                 if (ret) {
13429                         error("failed to check quota groups");
13430                         goto out;
13431                 }
13432                 report_qgroups(0);
13433                 ret = repair_qgroups(info, &qgroups_repaired);
13434                 err |= !!ret;
13435                 if (err) {
13436                         error("failed to repair quota groups");
13437                         goto out;
13438                 }
13439                 ret = 0;
13440         }
13441
13442         if (!list_empty(&root->fs_info->recow_ebs)) {
13443                 error("transid errors in file system");
13444                 ret = 1;
13445                 err |= !!ret;
13446         }
13447 out:
13448         printf("found %llu bytes used, ",
13449                (unsigned long long)bytes_used);
13450         if (err)
13451                 printf("error(s) found\n");
13452         else
13453                 printf("no error found\n");
13454         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13455         printf("total tree bytes: %llu\n",
13456                (unsigned long long)total_btree_bytes);
13457         printf("total fs tree bytes: %llu\n",
13458                (unsigned long long)total_fs_tree_bytes);
13459         printf("total extent tree bytes: %llu\n",
13460                (unsigned long long)total_extent_tree_bytes);
13461         printf("btree space waste bytes: %llu\n",
13462                (unsigned long long)btree_space_waste);
13463         printf("file data blocks allocated: %llu\n referenced %llu\n",
13464                 (unsigned long long)data_bytes_allocated,
13465                 (unsigned long long)data_bytes_referenced);
13466
13467         free_qgroup_counts();
13468         free_root_recs_tree(&root_cache);
13469 close_out:
13470         close_ctree(root);
13471 err_out:
13472         if (ctx.progress_enabled)
13473                 task_deinit(ctx.info);
13474
13475         return err;
13476 }